KaiserShultz commited on
Commit
715a633
·
0 Parent(s):

Initial commit

Browse files
.env.example ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ OPENAI_API_KEY=sk-yourkeyhere
2
+ TAVILY_API_KEY=tvly-yourkeyhere
3
+ GENAI_MODEL=gemma-3-27b-it
4
+ GOOGLE_API_KEY="your-google-api-key"
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+ *.egg-info/
9
+
10
+ # Virtual environments
11
+ venv/
12
+ env/
13
+ .venv/
14
+
15
+ # IDEs
16
+ .vscode/
17
+ .idea/
18
+ *.swp
19
+ *.swo
20
+
21
+ # OS
22
+ .DS_Store
23
+ .DS_Store?
24
+ ._*
25
+ .Spotlight-V100
26
+ .Trashes
27
+ ehthumbs.db
28
+ Thumbs.db
29
+
30
+ # Logs
31
+ *.log
32
+ logs/
33
+
34
+ # Environment variables
35
+ .env
36
+ .env.local
37
+ .env.*.local
38
+
39
+ # Dependencies
40
+ .pipenv/
41
+ Pipfile.lock
42
+
43
+ # Data and large files (your datasets)
44
+ data/*.xlsx
45
+ data/*.pdf
46
+ data/*.csv # Если большие; иначе удали
47
+ data/*.docx
48
+ output/ # Generated outputs
49
+
50
+ # Notebooks (if not committing)
51
+ *.ipynb_checkpoints/
52
+ test_*.ipynb # Тестовые notebooks
53
+
54
+ # Build/dist
55
+ build/
56
+ dist/
57
+ *.egg
58
+
59
+ # Testing
60
+ .coverage
61
+ htmlcov/
62
+ .tox/
63
+ .cache/
64
+
65
+ # Mac
66
+ ._*
67
+
68
+ # Windows
69
+ Thumbs.db
70
+ ehthumbs.db
src/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ANKELODON: Core AI Agent Package.
2
+
3
+ Import key components for easy use:
4
+ from src import workflow, llm
5
+ """
6
+
7
+ from .config import llm, TOOLS, CONFIG, TOOL_NODE, planner_llm
8
+ from .agent import workflow, build_workflow, should_continue
9
+ from .nodes import agent, planner, query_input, critique
10
+ from .schemas import AgentState, PlannerPlan, ComplexityLevel, CritiqueFeedback
11
+
12
+ __version__ = "0.1.0"
13
+ __all__ = [
14
+ "llm", "TOOLS", "CONFIG", "TOOL_NODE", "planner_llm",
15
+ "workflow", "build_workflow", "should_continue",
16
+ "agent", "planner", "query_input", "critique",
17
+ "AgentState", "PlannerPlan", "ComplexityLevel", "CritiqueFeedback",
18
+ "__version__"
19
+ ]
src/agent.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #GRAPH BUILDING
2
+ from nodes import (query_input, complexity_assessor, planner, agent, simple_executor, critic_evaluator, replanner, enhanced_finalizer)
3
+ from state import AgentState
4
+ from langgraph.graph import StateGraph, END
5
+ from nodes import should_continue, should_use_planning, should_replan
6
+ from langgraph.checkpoint.memory import MemorySaver
7
+ from config import DEBUGGING_TOOL_NODE
8
+
9
+ def build_workflow(checkpointer=None) -> StateGraph[AgentState]:
10
+ builder = StateGraph(AgentState)
11
+ builder.add_node("INPUT", query_input)
12
+ builder.add_node("COMPLEXITY_ASSESSOR", complexity_assessor)
13
+ builder.add_node("PLANNING", planner)
14
+ builder.add_node("AGENT", agent)
15
+ builder.add_node("TOOLS", DEBUGGING_TOOL_NODE)
16
+ builder.add_node("FINALIZER", enhanced_finalizer)
17
+ builder.add_node("SIMPLE_EXECUTOR", simple_executor)
18
+ builder.add_node("CRITIC", critic_evaluator)
19
+ builder.add_node("REPLANNER", replanner)
20
+
21
+ builder.set_entry_point("INPUT")
22
+ builder.add_edge("INPUT", "COMPLEXITY_ASSESSOR")
23
+
24
+ builder.add_conditional_edges(
25
+ "COMPLEXITY_ASSESSOR",
26
+ should_use_planning,
27
+ {"simple_executor": "SIMPLE_EXECUTOR", "planner": "PLANNING"},
28
+ )
29
+ builder.add_edge("SIMPLE_EXECUTOR", "FINALIZER")
30
+
31
+
32
+ builder.add_edge("PLANNING", "AGENT")
33
+ builder.add_conditional_edges(
34
+ "AGENT",
35
+ should_continue,
36
+ {"tools": "TOOLS", "agent": "AGENT", "final_answer": "FINALIZER"},
37
+ )
38
+ builder.add_edge("TOOLS", "AGENT")
39
+ builder.add_edge("FINALIZER", "CRITIC")
40
+ builder.add_conditional_edges(
41
+ "CRITIC",
42
+ should_replan,
43
+ {"end": END, "replan": "REPLANNER"},
44
+ )
45
+ builder.add_edge("REPLANNER", "AGENT")
46
+
47
+ if checkpointer:
48
+ system = builder.compile(checkpointer=checkpointer)
49
+ else:
50
+ system = builder.compile()
51
+ return system
src/config.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from tools.tools import *
3
+ from tools.code_interpreter import safe_code_run
4
+ from langgraph.prebuilt import ToolNode
5
+ from schemas import PlannerPlan
6
+
7
+ config = {"configurable": {"thread_id": "1"}, "recursion_limit" : 50}
8
+
9
+ TOOLS = [download_file_from_url, web_search,
10
+ arxiv_search, wiki_search, add, subtract, multiply, divide,
11
+ power, analyze_excel_file, analyze_csv_file, analyze_docx_file,
12
+ analyze_pdf_file, analyze_txt_file, analyze_image_file,
13
+ vision_qa_gemma, safe_code_run]
14
+
15
+ class DebuggingToolNode(ToolNode):
16
+ def __init__(self, tools):
17
+ super().__init__(tools)
18
+
19
+ def __call__(self, state):
20
+ print("=== TOOL EXECUTION STARTED ===")
21
+ result = super().__call__(state)
22
+ print("=== TOOL EXECUTION COMPLETED ===")
23
+ return result
24
+
25
+
26
+ TOOL_NODE = ToolNode(TOOLS)
27
+ DEBUGGING_TOOL_NODE = DebuggingToolNode(TOOLS)
28
+
29
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.25)
30
+ llm_with_tools = llm.bind_tools(TOOLS)
31
+ planner_llm = llm.with_structured_output(PlannerPlan)
32
+
33
+
34
+
35
+
src/nodes.py ADDED
@@ -0,0 +1,461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from state import AgentState
3
+ from tools.tools import preprocess_files
4
+ from langgraph.prebuilt import ToolNode
5
+ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
6
+ from prompts.prompts import SYSTEM_PROMPT_PLANNER, SYSTEM_EXECUTOR_PROMPT, COMPLEXITY_ASSESSOR_PROMPT, CRITIC_PROMPT
7
+ from config import llm, TOOLS, planner_llm, llm_with_tools
8
+ from schemas import PlannerPlan, ComplexityLevel, CritiqueFeedback, ExecutionReport, ToolExecution
9
+ from utils.utils import format_final_answer, clean_message_history
10
+
11
+ def query_input(state : AgentState) -> AgentState:
12
+ print("=== USER QUERY TRANSFERED TO AGENT ===")
13
+
14
+ files = state.get("files", [])
15
+ if files:
16
+ print(f"Processing {len(files)} files:")
17
+ file_info = preprocess_files(files)
18
+
19
+ for file_path, info in file_info.items():
20
+ print(f" - {file_path}: {info['type']} ({info['size']} bytes) -> {info['suggested_tool']}")
21
+
22
+ state["file_contents"] = file_info
23
+ file_context = "\n\n=== AVAILABLE FILES FOR ANALYSIS ===\n"
24
+ for file_path, info in file_info.items():
25
+ filename = os.path.basename(file_path)
26
+ file_context += f"File: {filename}\n"
27
+ file_context += f" - Type: {info['type']}\n"
28
+ file_context += f" - Size: {info['size']} bytes\n"
29
+ file_context += f" - Suggested tool: {info['suggested_tool']}\n"
30
+ if info.get("preview"):
31
+ file_context += f" - Preview: {info['preview']}\n"
32
+ file_context += "\n"
33
+
34
+ # Добавляем инструкции по работе с файлами
35
+ file_context += "IMPORTANT: Use the suggested tools to analyze these files before processing their data.\n"
36
+ file_context += "File paths are available in the agent state and can be passed directly to analysis tools.\n"
37
+
38
+ original_query = state.get("query", "")
39
+ state["query"] = original_query + file_context
40
+ return state
41
+
42
+
43
+ def planner(state : AgentState) -> AgentState:
44
+ sys_stack = [
45
+ SystemMessage(content=SYSTEM_PROMPT_PLANNER.strip()),
46
+ HumanMessage(content=state["query"]),
47
+ ]
48
+ plan: PlannerPlan = planner_llm.invoke(sys_stack)
49
+
50
+ print("=== GENERATED PLAN ===")
51
+ return {"messages" : sys_stack + state["messages"],
52
+ "plan": plan,
53
+ "current_step ": 0,
54
+ "reasoning_done": False}
55
+
56
+
57
+ def agent(state: AgentState) -> AgentState:
58
+
59
+ """
60
+ sys_msg = SystemMessage(
61
+ content=SYSTEM_EXECUTOR_PROMPT.strip().format(
62
+ plan=json.dumps(state["plan"], indent=2)
63
+ )
64
+ )
65
+ """
66
+ current_step = state.get("current_step", 0)
67
+ reasoning_done = state.get("reasoning_done", False)
68
+ plan = state.get("plan", {})
69
+ steps = state["plan"].steps
70
+
71
+ print(f"=== AGENT DEBUG ===")
72
+ print(f"Current step: {current_step}")
73
+ print(f"Reasoning done: {reasoning_done}")
74
+ print(f"Plan exists: {plan is not None}")
75
+ print(f"Total steps in plan: {len(plan.steps) if plan else 'No plan'}")
76
+
77
+ if not plan or not hasattr(plan, 'steps') or not plan.steps:
78
+ print("ERROR: No valid plan found!")
79
+ return {
80
+ "messages": state["messages"] + [AIMessage(content="No valid plan available. <FINAL_ANSWER>")],
81
+ "reasoning_done": False
82
+ }
83
+
84
+ steps = plan.steps
85
+
86
+ if current_step >= len(steps):
87
+ print("All plan steps completed, moving to finalization")
88
+ return {
89
+ "messages": state["messages"] + [AIMessage(content="All steps completed. <FINAL_ANSWER>")],
90
+ "reasoning_done": False
91
+ }
92
+
93
+ current_step_info = steps[current_step]
94
+ print(f"Executing step {current_step + 1}: {current_step_info.description}")
95
+
96
+ if not reasoning_done:
97
+
98
+ # ✅ ДОБАВЛЕНО: Специальный контекст для файлов
99
+ file_context = ""
100
+ file_contents = state.get("file_contents", {})
101
+ if file_contents:
102
+ file_context = "\n\nAVAILABLE FILES IN CURRENT SESSION:\n"
103
+ for filepath, info in file_contents.items():
104
+ filename = os.path.basename(filepath)
105
+ file_context += f"- {filename}: {info['type']} file, suggested tool: {info['suggested_tool']}\n"
106
+ file_context += f" Path: {filepath}\n"
107
+
108
+ reasoning_prompt = f"""
109
+ {SYSTEM_EXECUTOR_PROMPT}
110
+
111
+ CURRENT TASK: You must perform reasoning for step {current_step + 1}.
112
+
113
+ STEP INFO: {current_step_info}\n\n
114
+
115
+ FILE CONTEXT: {file_contents}
116
+
117
+ CRITICAL: You MUST output your reasoning in <REASONING> tags, but DO NOT call any tools yet.
118
+ Explain what you need to do and why, then end your response.
119
+
120
+ REASONING IS IMPERATIVE BEFORE ANY TOOL CALLS.
121
+ """
122
+
123
+ sys_msg = SystemMessage(content = reasoning_prompt)
124
+ stack = [sys_msg] + state["messages"]
125
+
126
+ step = llm.invoke(stack)
127
+ print("=== REASONING STEP ===")
128
+ print(step.content)
129
+
130
+ return {
131
+ "messages" : state["messages"] + [step],
132
+ "reasoning_done" : True
133
+ }
134
+
135
+ else:
136
+ tool_prompt = f"""
137
+ Now execute the tool for step {current_step + 1}.
138
+
139
+ You have already done the reasoning. Now call the appropriate tool with the correct parameters.
140
+ Available file paths: {list(state.get("file_contents", {}).keys())}\n
141
+ IMPORTANT NOTE: IF YOU DECIDED TO USE safe_code_run, MAKE SURE TO FINISH CALCULATIONS WITH print() or saving to a variable NAMED 'result' so that the output can be captured!
142
+ AVAILABLE TOOLS: {', '.join([tool.name for tool in TOOLS])}
143
+ """
144
+
145
+ sys_msg = SystemMessage(content=tool_prompt)
146
+ stack = [sys_msg] + state["messages"] # Берем последние сообщения включая reasoning
147
+
148
+ # Используем модель С инструментами для выполнения
149
+ step = llm_with_tools.invoke(stack)
150
+ print("=== TOOL EXECUTION ===")
151
+ print(f"Tool calls: {step.tool_calls}")
152
+
153
+ return {
154
+ "messages": state["messages"] + [step],
155
+ "current_step": current_step + 1 if step.tool_calls else current_step,
156
+ "reasoning_done": False # Сбрасываем для следующего шага
157
+ }
158
+
159
+ def should_continue(state : AgentState) -> bool:
160
+
161
+ last_message = state["messages"][-1]
162
+ reasoning_done = state.get("reasoning_done", False)
163
+ plan = state.get("plan", None)
164
+ current_step = state.get("current_step", 0)
165
+
166
+ if plan and current_step >= len(plan.steps):
167
+ return "final_answer"
168
+
169
+
170
+ if hasattr(last_message, "content") and "<FINAL_ANSWER>" in last_message.content:
171
+ return "final_answer"
172
+ elif hasattr(last_message, "tool_calls") and last_message.tool_calls:
173
+ return "tools"
174
+ elif not reasoning_done and hasattr(last_message, 'content') and "<REASONING>" in last_message.content:
175
+ # Reasoning выполнен, но инструменты еще не вызваны
176
+ return "agent"
177
+ elif reasoning_done:
178
+ # Reasoning выполнен, теперь нужно вызвать инструменты
179
+ return "agent"
180
+ else:
181
+ # Нужно сделать reasoning
182
+ return "agent"
183
+
184
+ # 6. Добавить отладочную информацию в TOOL_NODE
185
+ class DebuggingToolNode(ToolNode):
186
+ def __init__(self, tools):
187
+ super().__init__(tools)
188
+
189
+ def __call__(self, state):
190
+ print("=== TOOL EXECUTION STARTED ===")
191
+ result = super().__call__(state)
192
+ print("=== TOOL EXECUTION COMPLETED ===")
193
+ return result
194
+
195
+
196
+
197
+ def enhanced_finalizer(state: AgentState) -> AgentState:
198
+ """Generate comprehensive execution report for critic evaluation."""
199
+ print("=== GENERATING EXECUTION REPORT ===")
200
+
201
+ # Extract tool execution information
202
+ tools_executed = []
203
+ data_sources = []
204
+
205
+ for msg in state["messages"]:
206
+ if hasattr(msg, 'tool_calls') and msg.tool_calls:
207
+ for tool_call in msg.tool_calls:
208
+ tools_executed.append(ToolExecution(
209
+ tool_name=tool_call['name'],
210
+ arguments=str(tool_call['args']),
211
+ call_id=tool_call['id']
212
+ ))
213
+
214
+ # Extract data sources from tool results
215
+ if hasattr(msg, 'content') and isinstance(msg.content, str):
216
+ # Look for URLs, file names, or other sources
217
+ import re
218
+ urls = re.findall(r'https?://[^\s]+', msg.content)
219
+ data_sources.extend(urls)
220
+
221
+ # Get plan information if available
222
+ plan = state.get("plan")
223
+ approach_used = "Direct execution"
224
+ assumptions_made = []
225
+
226
+ if plan:
227
+ approach_used = f"{plan.task_type} approach with {len(plan.steps)} steps"
228
+ assumptions_made = plan.assumptions
229
+
230
+ # Generate structured report (КОСТЫЛЬ ЗДЕСЬ!)
231
+ report_generator_prompt = f"""
232
+ Generate a comprehensive execution report for the following query processing:
233
+
234
+ ORIGINAL QUERY: {state['query']}
235
+
236
+ EXECUTION CONTEXT:
237
+ - Complexity Level: {state.get('complexity_assessment', {}).level}
238
+ - Plan Used: {plan if plan else {}}
239
+ - Tools Executed: {tools_executed}
240
+ - Available Files: {list(state.get('file_contents', {}).keys())}
241
+
242
+ CONVERSATION HISTORY:
243
+ {[msg.content[:200] + "..." if len(msg.content) > 200 else msg.content
244
+ for msg in state['messages'][-5:]]} # Last 5 messages for context
245
+
246
+ Based on this information, create a structured execution report that includes:
247
+ 1. Query summary
248
+ 2. Approach used
249
+ 3. Key findings from the execution
250
+ 4. Data sources used
251
+ 5. Your confidence level in the results
252
+ 6. Any limitations or caveats
253
+ 7. The final answer
254
+
255
+ Be thorough but concise. This report will be evaluated by a critic for quality assurance.
256
+ """
257
+
258
+ report_llm = llm.with_structured_output(ExecutionReport)
259
+
260
+ execution_report = report_llm.invoke([
261
+ SystemMessage(content=report_generator_prompt),
262
+ HumanMessage(content="Generate the execution report.")
263
+ ])
264
+
265
+ print(f"Report generated - Confidence: {execution_report.confidence_level}")
266
+ print(f"Key findings: {len(execution_report.key_findings)}")
267
+ print(f"Data sources: {len(execution_report.data_sources)}")
268
+
269
+ # Format final answer for user
270
+ formatted_answer = format_final_answer(execution_report, state.get('complexity_assessment', {}))
271
+ print(execution_report)
272
+ return {
273
+ "execution_report": execution_report,
274
+ "final_answer": formatted_answer
275
+ }
276
+
277
+
278
+ def simple_executor(state: AgentState) -> AgentState:
279
+ """Handle simple queries directly without planning."""
280
+ print("=== SIMPLE EXECUTION ===")
281
+
282
+ # For simple queries, use the LLM with tools directly
283
+ simple_prompt = f"""
284
+ Answer this simple query directly and efficiently: {state['query']}
285
+
286
+ You have access to tools if needed, but try to answer directly when possible.
287
+ If you need files, they are available at: {list(state.get('file_contents', {}).keys())}
288
+
289
+ Provide a clear, concise answer.
290
+ """
291
+
292
+ response = llm_with_tools.invoke([
293
+ SystemMessage(content=simple_prompt),
294
+ HumanMessage(content=state['query'])
295
+ ])
296
+
297
+ return {
298
+ "messages": state["messages"] + [response],
299
+ "final_answer": response.content
300
+ }
301
+
302
+
303
+ def should_use_planning(state: AgentState) -> str:
304
+ """Route based on complexity assessment."""
305
+ complexity = state["complexity_assessment"]
306
+
307
+ if complexity.level == "simple" and not complexity.needs_planning:
308
+ return "simple_executor"
309
+ else:
310
+ return "planner"
311
+
312
+
313
+ def critic_evaluator(state: AgentState) -> AgentState:
314
+ """Enhanced critic that evaluates execution reports."""
315
+ print("=== ENHANCED ANSWER CRITIQUE ===")
316
+
317
+ report = state.get("execution_report")
318
+ critic_llm = llm.with_structured_output(CritiqueFeedback)
319
+
320
+ critique_prompt = CRITIC_PROMPT.format(
321
+ query=report.query_summary,
322
+ approach=report.approach_used,
323
+ tools=report.tools_executed,
324
+ findings=report.key_findings,
325
+ sources=report.data_sources,
326
+ confidence=report.confidence_level,
327
+ limitations=report.limitations,
328
+ answer=report.final_answer
329
+ )
330
+
331
+ critique = critic_llm.invoke([
332
+ SystemMessage(content=critique_prompt),
333
+ HumanMessage(content="Evaluate this execution report thoroughly.")
334
+ ])
335
+
336
+ print(f"Quality Score: {critique.quality_score}/10")
337
+ print(f"Complete: {critique.is_complete}")
338
+ print(f"Accurate: {critique.is_accurate}")
339
+
340
+ if critique.errors_found:
341
+ print(f"Issues found: {critique.errors_found}")
342
+
343
+ if critique.needs_replanning:
344
+ print(f"Replanning needed: {critique.replan_instructions}")
345
+
346
+ return {
347
+ "critique_feedback": critique,
348
+ "iteration_count": state.get("iteration_count", 0) + 1
349
+ }
350
+
351
+
352
+
353
+ def should_replan(state: AgentState) -> str:
354
+ """Decide whether to accept answer, replan, or stop."""
355
+ critique = state.get("critique_feedback")
356
+ iteration_count = state.get("iteration_count", 0)
357
+ max_iterations = state.get("max_iterations", 3)
358
+
359
+
360
+ print(f"=== REPLAN DECISION ===")
361
+ print(f"Iteration: {iteration_count}/{max_iterations}")
362
+ print(f"Quality score: {critique.quality_score if critique else 'N/A'}")
363
+ print(f"Needs replanning: {critique.needs_replanning if critique else 'N/A'}")
364
+
365
+ if not critique:
366
+ return "end"
367
+
368
+ # Stop if max iterations reached
369
+ if iteration_count >= max_iterations:
370
+ print(f"Max iterations ({max_iterations}) reached. Accepting current answer.")
371
+ return "end"
372
+
373
+ # Accept if quality is good enough
374
+ if critique.quality_score >= 7 or not critique.needs_replanning:
375
+ print("Quality acceptable, ending execution")
376
+ return "end"
377
+
378
+ # Replan if quality is poor and we haven't exceeded max iterations
379
+ if critique.needs_replanning and iteration_count < max_iterations:
380
+ print("Replanning due to critic feedback...")
381
+ return "replan"
382
+
383
+ return "end"
384
+
385
+ def replanner(state: AgentState) -> AgentState:
386
+ """Create a revised plan based on critic feedback."""
387
+ print("=== REPLANNING ===")
388
+
389
+ critique = state["critique_feedback"]
390
+ previous_plan = state.get("plan")
391
+
392
+ replan_prompt = f"""
393
+ {SYSTEM_PROMPT_PLANNER}
394
+
395
+ REPLANNING CONTEXT:
396
+ Original Query: {state['query']}
397
+ Previous Plan: {previous_plan if previous_plan else {}}
398
+
399
+ CRITIC FEEDBACK:
400
+ - Quality Score: {critique.quality_score}/10
401
+ - Issues Found: {critique.errors_found}
402
+ - Missing Elements: {critique.missing_elements}
403
+ - Improvement Suggestions: {critique.suggested_improvements}
404
+ - Specific Instructions: {critique.replan_instructions}
405
+
406
+ Create a REVISED plan that addresses these issues. Focus on fixing the identified problems.
407
+ """
408
+
409
+ revised_plan = planner_llm.invoke([
410
+ SystemMessage(content=replan_prompt),
411
+ HumanMessage(content="Create a revised plan based on the feedback.")
412
+ ])
413
+
414
+ print("Plan revised based on critic feedback")
415
+
416
+ # Очищаем историю сообщений от неполных tool_calls
417
+ current_messages = state.get("messages", [])
418
+ cleaned_messages = clean_message_history(current_messages)
419
+
420
+ # Оставляем только системные сообщения и начальный запрос
421
+ essential_messages = []
422
+ for msg in cleaned_messages:
423
+ if isinstance(msg, (SystemMessage, HumanMessage)):
424
+ # Сохраняем системные сообщения и пользовательские запросы
425
+ if ("complexity" in msg.content.lower() or
426
+ "assess" in msg.content.lower() or
427
+ isinstance(msg, HumanMessage)):
428
+ essential_messages.append(msg)
429
+
430
+ print(f"Cleaned message history: {len(current_messages)} -> {len(essential_messages)} messages")
431
+
432
+ return {
433
+ "plan": revised_plan,
434
+ "current_step": 0,
435
+ "reasoning_done": False,
436
+ "messages": essential_messages,
437
+ "execution_report": None
438
+ }
439
+
440
+
441
+ def complexity_assessor(state: AgentState) -> AgentState:
442
+ """Assess query complexity and determine if planning is needed."""
443
+ print("=== COMPLEXITY ASSESSMENT ===")
444
+
445
+ complexity_llm = llm.with_structured_output(ComplexityLevel)
446
+
447
+ assessment_message = [
448
+ SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
449
+ HumanMessage(content=f"Query: {state['query']}")
450
+ ]
451
+
452
+ assessment = complexity_llm.invoke(assessment_message)
453
+
454
+ print(f"Complexity: {assessment.level}")
455
+ print(f"Needs planning: {assessment.needs_planning}")
456
+ print(f"Reasoning: {assessment.reasoning}")
457
+
458
+ return {
459
+ "complexity_assessment": assessment,
460
+ "messages": state["messages"] + assessment_message
461
+ }
src/notebook_port/__init__.py ADDED
File without changes
src/notebook_port/test_unstable_port.py ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # === AUTO-GENERATED FROM test_.ipynb (do not edit logic) ===
2
+ # Only additive imports below to resolve package paths.
3
+ import sys, os
4
+ from pathlib import Path as _Path
5
+
6
+ # Ensure project root is importable when running as a module
7
+ _CUR = _Path(__file__).resolve()
8
+ _SRC = _CUR.parent.parent
9
+ _ROOT = _SRC.parent
10
+ if str(_ROOT) not in sys.path:
11
+ sys.path.insert(0, str(_ROOT))
12
+
13
+ # Prefer package-qualified imports; leave original notebook imports untouched below.
14
+ try:
15
+ from src.prompts import * # noqa: F401,F403
16
+ from src.schemas import * # noqa: F401,F403
17
+ from src.tools import * # noqa: F401,F403
18
+ from src.tools.code_interpreter import safe_code_run # noqa: F401
19
+ except Exception:
20
+ # Fallbacks if executed inside src as working directory
21
+ pass
22
+
23
+
24
+ # === CELL 0 FROM NOTEBOOK ===
25
+ import math
26
+ from langgraph.checkpoint.memory import MemorySaver
27
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
28
+ import uuid
29
+ from prompts import *
30
+ from schemas import *
31
+ from typing import Sequence
32
+ from langchain_core.messages import BaseMessage
33
+ from langgraph.graph.message import add_messages
34
+
35
+ import os, io, json, base64
36
+ from typing import Optional, Dict, Any, List
37
+ from langchain_core.tools import tool
38
+
39
+ # pip install google-generativeai pillow
40
+ import google.generativeai as genai
41
+ from PIL import Image
42
+ from langgraph.prebuilt import ToolNode
43
+
44
+ from dotenv import load_dotenv
45
+ import pandas as pd
46
+ from IPython.display import display, Image
47
+ from langchain_community.document_loaders import DataFrameLoader, TextLoader
48
+ from langchain_community.vectorstores import Chroma
49
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
50
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
51
+ from langchain.schema import Document
52
+ from langchain.schema.output_parser import StrOutputParser
53
+ import pickle
54
+
55
+
56
+ from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
57
+ from pydantic import BaseModel, Field
58
+
59
+ from typing import List, TypedDict, Annotated, Literal, Optional, Union
60
+
61
+ from langgraph.graph import StateGraph, END
62
+
63
+ load_dotenv()
64
+ import os
65
+ import json
66
+ import re
67
+ import operator
68
+
69
+ from langgraph.store.memory import InMemoryStore
70
+ in_memory_store = InMemoryStore() #сохраняем состояние между запусками
71
+
72
+ from IPython.display import Image, display
73
+
74
+ from langgraph.checkpoint.memory import MemorySaver
75
+ from langgraph.graph import StateGraph, MessagesState, START, END
76
+ from langgraph.store.base import BaseStore
77
+
78
+ from langchain_core.messages import HumanMessage, SystemMessage
79
+ from langchain_core.runnables.config import RunnableConfig
80
+ from PIL import Image, ImageStat, ExifTags
81
+ import pandas as pd
82
+
83
+
84
+ #TOOLS
85
+
86
+ from tools import (web_search, arxiv_search, wiki_search, add, subtract, multiply, divide, power,
87
+ analyze_csv_file, analyze_docx_file, analyze_pdf_file, analyze_txt_file, analyze_image_file, vision_qa_gemma, analyze_excel_file, preprocess_files, save_and_read_file, download_file_from_url)
88
+
89
+ from code_interpreter import safe_code_run
90
+
91
+
92
+ # === CELL 1 FROM NOTEBOOK ===
93
+
94
+ def clean_message_history(messages):
95
+ """
96
+ Очищает историю сообщений от неполных циклов tool_calls/responses.
97
+ Удаляет AIMessage с tool_calls, если нет соответствующих ToolMessage.
98
+ """
99
+ cleaned_messages = []
100
+ i = 0
101
+
102
+ while i < len(messages):
103
+ msg = messages[i]
104
+
105
+ # Если это AIMessage с tool_calls
106
+ if hasattr(msg, 'tool_calls') and msg.tool_calls:
107
+ # Ищем соответствующие ToolMessage
108
+ tool_call_ids = {tc['id'] for tc in msg.tool_calls}
109
+ found_responses = set()
110
+
111
+ # Проверяем следующие сообщения на наличие ответов
112
+ j = i + 1
113
+ while j < len(messages) and isinstance(messages[j], ToolMessage):
114
+ if messages[j].tool_call_id in tool_call_ids:
115
+ found_responses.add(messages[j].tool_call_id)
116
+ j += 1
117
+
118
+ # Если все tool_calls имеют ответы, добавляем весь блок
119
+ if found_responses == tool_call_ids:
120
+ # Добавляем AIMessage и все соответствующие ToolMessage
121
+ cleaned_messages.append(msg)
122
+ for k in range(i + 1, j):
123
+ cleaned_messages.append(messages[k])
124
+ i = j
125
+ else:
126
+ # Пропускаем неполный блок
127
+ print(f"Removing incomplete tool call block: {tool_call_ids - found_responses}")
128
+ i = j
129
+ else:
130
+ # Обычное сообщение - добавляем
131
+ cleaned_messages.append(msg)
132
+ i += 1
133
+
134
+ return cleaned_messages
135
+
136
+ # === CELL 2 FROM NOTEBOOK ===
137
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.25)
138
+ TOOLS = [download_file_from_url, web_search, arxiv_search, wiki_search, add, subtract, multiply, divide, power, analyze_excel_file, analyze_csv_file, analyze_docx_file, analyze_pdf_file, analyze_txt_file, analyze_image_file, vision_qa_gemma, safe_code_run]
139
+
140
+ # === CELL 3 FROM NOTEBOOK ===
141
+ llm_with_tools = llm.bind_tools(TOOLS)
142
+ config = {"configurable": {"thread_id": "1"}, "recursion_limit" : 50}
143
+ TOOL_NODE = ToolNode(TOOLS)
144
+ planner_llm = llm.with_structured_output(PlannerPlan)
145
+
146
+ class AgentState(MessagesState):
147
+ query: str
148
+ final_answer: str
149
+ plan: Optional[PlannerPlan]
150
+ complexity_assessment: ComplexityLevel
151
+ current_step: int
152
+ reasoning_done: bool
153
+ messages : Annotated[Sequence[BaseMessage], add_messages]
154
+ files: List[str]
155
+ file_contents: Dict[str, Any]
156
+ critique_feedback: Optional[CritiqueFeedback]
157
+ iteration_count :int
158
+ max_iterations: int
159
+ execution_report : ExecutionReport
160
+
161
+
162
+
163
+ def query_input(state : AgentState) -> AgentState:
164
+ print("=== USER QUERY TRANSFERED TO AGENT ===")
165
+
166
+ files = state.get("files", [])
167
+ if files:
168
+ print(f"Processing {len(files)} files:")
169
+ file_info = preprocess_files(files)
170
+
171
+ for file_path, info in file_info.items():
172
+ print(f" - {file_path}: {info['type']} ({info['size']} bytes) -> {info['suggested_tool']}")
173
+
174
+ state["file_contents"] = file_info
175
+ file_context = "\n\n=== AVAILABLE FILES FOR ANALYSIS ===\n"
176
+ for file_path, info in file_info.items():
177
+ filename = os.path.basename(file_path)
178
+ file_context += f"File: {filename}\n"
179
+ file_context += f" - Type: {info['type']}\n"
180
+ file_context += f" - Size: {info['size']} bytes\n"
181
+ file_context += f" - Suggested tool: {info['suggested_tool']}\n"
182
+ if info.get("preview"):
183
+ file_context += f" - Preview: {info['preview']}\n"
184
+ file_context += "\n"
185
+
186
+ # Добавляем инструкции по работе с файлами
187
+ file_context += "IMPORTANT: Use the suggested tools to analyze these files before processing their data.\n"
188
+ file_context += "File paths are available in the agent state and can be passed directly to analysis tools.\n"
189
+
190
+ original_query = state.get("query", "")
191
+ state["query"] = original_query + file_context
192
+ return state
193
+
194
+
195
+ def planner(state : AgentState) -> AgentState:
196
+ sys_stack = [
197
+ SystemMessage(content=SYSTEM_PROMPT_PLANNER.strip()),
198
+ HumanMessage(content=state["query"]),
199
+ ]
200
+ plan: PlannerPlan = planner_llm.invoke(sys_stack)
201
+
202
+ print("=== GENERATED PLAN ===")
203
+ return {"messages" : sys_stack + state["messages"],
204
+ "plan": plan,
205
+ "current_step ": 0,
206
+ "reasoning_done": False}
207
+
208
+ def agent(state: AgentState) -> AgentState:
209
+
210
+ """
211
+ sys_msg = SystemMessage(
212
+ content=SYSTEM_EXECUTOR_PROMPT.strip().format(
213
+ plan=json.dumps(state["plan"], indent=2)
214
+ )
215
+ )
216
+ """
217
+ current_step = state.get("current_step", 0)
218
+ reasoning_done = state.get("reasoning_done", False)
219
+ plan = state.get("plan", {})
220
+ steps = state["plan"].steps
221
+
222
+ print(f"=== AGENT DEBUG ===")
223
+ print(f"Current step: {current_step}")
224
+ print(f"Reasoning done: {reasoning_done}")
225
+ print(f"Plan exists: {plan is not None}")
226
+ print(f"Total steps in plan: {len(plan.steps) if plan else 'No plan'}")
227
+
228
+ if not plan or not hasattr(plan, 'steps') or not plan.steps:
229
+ print("ERROR: No valid plan found!")
230
+ return {
231
+ "messages": state["messages"] + [AIMessage(content="No valid plan available. <FINAL_ANSWER>")],
232
+ "reasoning_done": False
233
+ }
234
+
235
+ steps = plan.steps
236
+
237
+ if current_step >= len(steps):
238
+ print("All plan steps completed, moving to finalization")
239
+ return {
240
+ "messages": state["messages"] + [AIMessage(content="All steps completed. <FINAL_ANSWER>")],
241
+ "reasoning_done": False
242
+ }
243
+
244
+ current_step_info = steps[current_step]
245
+ print(f"Executing step {current_step + 1}: {current_step_info.description}")
246
+
247
+ if not reasoning_done:
248
+
249
+ # ✅ ДОБАВЛЕНО: Специальный контекст для файлов
250
+ file_context = ""
251
+ file_contents = state.get("file_contents", {})
252
+ if file_contents:
253
+ file_context = "\n\nAVAILABLE FILES IN CURRENT SESSION:\n"
254
+ for filepath, info in file_contents.items():
255
+ filename = os.path.basename(filepath)
256
+ file_context += f"- {filename}: {info['type']} file, suggested tool: {info['suggested_tool']}\n"
257
+ file_context += f" Path: {filepath}\n"
258
+
259
+ reasoning_prompt = f"""
260
+ {SYSTEM_EXECUTOR_PROMPT}
261
+
262
+ CURRENT TASK: You must perform reasoning for step {current_step + 1}.
263
+
264
+ STEP INFO: {current_step_info}\n\n
265
+
266
+ FILE CONTEXT: {file_contents}
267
+
268
+ CRITICAL: You MUST output your reasoning in <REASONING> tags, but DO NOT call any tools yet.
269
+ Explain what you need to do and why, then end your response.
270
+
271
+ REASONING IS IMPERATIVE BEFORE ANY TOOL CALLS.
272
+ """
273
+
274
+ sys_msg = SystemMessage(content = reasoning_prompt)
275
+ stack = [sys_msg] + state["messages"]
276
+
277
+ step = llm.invoke(stack)
278
+ print("=== REASONING STEP ===")
279
+ print(step.content)
280
+
281
+ return {
282
+ "messages" : state["messages"] + [step],
283
+ "reasoning_done" : True
284
+ }
285
+
286
+ else:
287
+ tool_prompt = f"""
288
+ Now execute the tool for step {current_step + 1}.
289
+
290
+ You have already done the reasoning. Now call the appropriate tool with the correct parameters.
291
+ Available file paths: {list(state.get("file_contents", {}).keys())}\n
292
+ IMPORTANT NOTE: IF YOU DECIDED TO USE safe_code_run, MAKE SURE TO FINISH CALCULATIONS WITH print() or saving to a variable NAMED 'result' so that the output can be captured!
293
+ AVAILABLE TOOLS: {', '.join([tool.name for tool in TOOLS])}
294
+ """
295
+
296
+ sys_msg = SystemMessage(content=tool_prompt)
297
+ stack = [sys_msg] + state["messages"] # Берем последние сообщения включая reasoning
298
+
299
+ # Используем модель С инструментами для выполнения
300
+ step = llm_with_tools.invoke(stack)
301
+ print("=== TOOL EXECUTION ===")
302
+ print(f"Tool calls: {step.tool_calls}")
303
+
304
+ return {
305
+ "messages": state["messages"] + [step],
306
+ "current_step": current_step + 1 if step.tool_calls else current_step,
307
+ "reasoning_done": False # Сбрасываем для следующего шага
308
+ }
309
+
310
+
311
+ def should_continue(state : AgentState) -> bool:
312
+
313
+ last_message = state["messages"][-1]
314
+ reasoning_done = state.get("reasoning_done", False)
315
+ plan = state.get("plan", None)
316
+ current_step = state.get("current_step", 0)
317
+
318
+ if plan and current_step >= len(plan.steps):
319
+ return "final_answer"
320
+
321
+
322
+ if hasattr(last_message, "content") and "<FINAL_ANSWER>" in last_message.content:
323
+ return "final_answer"
324
+ elif hasattr(last_message, "tool_calls") and last_message.tool_calls:
325
+ return "tools"
326
+ elif not reasoning_done and hasattr(last_message, 'content') and "<REASONING>" in last_message.content:
327
+ # Reasoning выполнен, но инструменты еще не вызваны
328
+ return "agent"
329
+ elif reasoning_done:
330
+ # Reasoning выполнен, теперь нужно вызвать инструменты
331
+ return "agent"
332
+ else:
333
+ # Нужно сделать reasoning
334
+ return "agent"
335
+
336
+ # 6. Добавить отладочную информацию в TOOL_NODE
337
+ class DebuggingToolNode(ToolNode):
338
+ def __init__(self, tools):
339
+ super().__init__(tools)
340
+
341
+ def __call__(self, state):
342
+ print("=== TOOL EXECUTION STARTED ===")
343
+ result = super().__call__(state)
344
+ print("=== TOOL EXECUTION COMPLETED ===")
345
+ return result
346
+
347
+ DEBUGGING_TOOL_NODE = DebuggingToolNode(TOOLS)
348
+
349
+
350
+
351
+ """
352
+ def summary(state : AgentState) -> AgentState:
353
+ print("=== FINAL ANSWER ===")
354
+ summarizer_prompt =
355
+ Now you have to provide final answer for the user query : {query}
356
+ In messages below you have all the context you need.
357
+
358
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, Apply the rules above for each element (number or string), ensure there is exactly one space after each comma.
359
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
360
+
361
+ Here is the context:
362
+ {messages}
363
+
364
+ REMEMBER AND STRICTLY FOLLOW THE FORMATTING RULES ABOVE. ALWAYS USE THIS FORMAT:
365
+ FINAL ANSWER: ...
366
+
367
+
368
+ state["final_answer"] = llm.invoke([SystemMessage(content=summarizer_prompt.strip().format(query=state["query"], messages = state["messages"]))])
369
+ return state
370
+ """
371
+
372
+ def enhanced_finalizer(state: AgentState) -> AgentState:
373
+ """Generate comprehensive execution report for critic evaluation."""
374
+ print("=== GENERATING EXECUTION REPORT ===")
375
+
376
+ # Extract tool execution information
377
+ tools_executed = []
378
+ data_sources = []
379
+
380
+ for msg in state["messages"]:
381
+ if hasattr(msg, 'tool_calls') and msg.tool_calls:
382
+ for tool_call in msg.tool_calls:
383
+ tools_executed.append(ToolExecution(
384
+ tool_name=tool_call['name'],
385
+ arguments=str(tool_call['args']),
386
+ call_id=tool_call['id']
387
+ ))
388
+
389
+ # Extract data sources from tool results
390
+ if hasattr(msg, 'content') and isinstance(msg.content, str):
391
+ # Look for URLs, file names, or other sources
392
+ import re
393
+ urls = re.findall(r'https?://[^\s]+', msg.content)
394
+ data_sources.extend(urls)
395
+
396
+ # Get plan information if available
397
+ plan = state.get("plan")
398
+ approach_used = "Direct execution"
399
+ assumptions_made = []
400
+
401
+ if plan:
402
+ approach_used = f"{plan.task_type} approach with {len(plan.steps)} steps"
403
+ assumptions_made = plan.assumptions
404
+
405
+ # Generate structured report (КОСТЫЛЬ ЗДЕСЬ!)
406
+ report_generator_prompt = f"""
407
+ Generate a comprehensive execution report for the following query processing:
408
+
409
+ ORIGINAL QUERY: {state['query']}
410
+
411
+ EXECUTION CONTEXT:
412
+ - Complexity Level: {state.get('complexity_assessment', {}).level}
413
+ - Plan Used: {plan if plan else {}}
414
+ - Tools Executed: {tools_executed}
415
+ - Available Files: {list(state.get('file_contents', {}).keys())}
416
+
417
+ CONVERSATION HISTORY:
418
+ {[msg.content[:200] + "..." if len(msg.content) > 200 else msg.content
419
+ for msg in state['messages'][-5:]]} # Last 5 messages for context
420
+
421
+ Based on this information, create a structured execution report that includes:
422
+ 1. Query summary
423
+ 2. Approach used
424
+ 3. Key findings from the execution
425
+ 4. Data sources used
426
+ 5. Your confidence level in the results
427
+ 6. Any limitations or caveats
428
+ 7. The final answer
429
+
430
+ Be thorough but concise. This report will be evaluated by a critic for quality assurance.
431
+ """
432
+
433
+ report_llm = llm.with_structured_output(ExecutionReport)
434
+
435
+ execution_report = report_llm.invoke([
436
+ SystemMessage(content=report_generator_prompt),
437
+ HumanMessage(content="Generate the execution report.")
438
+ ])
439
+
440
+ print(f"Report generated - Confidence: {execution_report.confidence_level}")
441
+ print(f"Key findings: {len(execution_report.key_findings)}")
442
+ print(f"Data sources: {len(execution_report.data_sources)}")
443
+
444
+ # Format final answer for user
445
+ formatted_answer = format_final_answer(execution_report, state.get('complexity_assessment', {}))
446
+ print(execution_report)
447
+ return {
448
+ "execution_report": execution_report,
449
+ "final_answer": formatted_answer
450
+ }
451
+
452
+ def format_final_answer(report: ExecutionReport, complexity: dict) -> str:
453
+ """Format the final answer based on complexity and report content."""
454
+
455
+ if complexity.level == 'simple':
456
+ # For simple queries, just return the answer
457
+ return f"FINAL ANSWER: {report.final_answer}"
458
+
459
+ # For complex queries, provide more detailed response
460
+ formatted = f"""FINAL ANSWER: {report.final_answer}
461
+
462
+ SUMMARY:
463
+ {report.query_summary}
464
+
465
+ KEY FINDINGS:
466
+ {chr(10).join(f"• {finding}" for finding in report.key_findings)}"""
467
+
468
+ if report.data_sources:
469
+ formatted += f"""
470
+
471
+ SOURCES:
472
+ {chr(10).join(f"• {source}" for source in report.data_sources[:5])}""" # Limit to 5 sources
473
+
474
+ if report.limitations:
475
+ formatted += f"""
476
+
477
+ LIMITATIONS:
478
+ {chr(10).join(f"• {limitation}" for limitation in report.limitations)}"""
479
+
480
+ return formatted
481
+
482
+
483
+ def complexity_assessor(state: AgentState) -> AgentState:
484
+ """Assess query complexity and determine if planning is needed."""
485
+ print("=== COMPLEXITY ASSESSMENT ===")
486
+
487
+ complexity_llm = llm.with_structured_output(ComplexityLevel)
488
+
489
+ assessment_message = [
490
+ SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
491
+ HumanMessage(content=f"Query: {state['query']}")
492
+ ]
493
+
494
+ assessment = complexity_llm.invoke(assessment_message)
495
+
496
+ print(f"Complexity: {assessment.level}")
497
+ print(f"Needs planning: {assessment.needs_planning}")
498
+ print(f"Reasoning: {assessment.reasoning}")
499
+
500
+ return {
501
+ "complexity_assessment": assessment,
502
+ "messages": state["messages"] + assessment_message
503
+ }
504
+
505
+
506
+ def simple_executor(state: AgentState) -> AgentState:
507
+ """Handle simple queries directly without planning."""
508
+ print("=== SIMPLE EXECUTION ===")
509
+
510
+ # For simple queries, use the LLM with tools directly
511
+ simple_prompt = f"""
512
+ Answer this simple query directly and efficiently: {state['query']}
513
+
514
+ You have access to tools if needed, but try to answer directly when possible.
515
+ If you need files, they are available at: {list(state.get('file_contents', {}).keys())}
516
+
517
+ Provide a clear, concise answer.
518
+ """
519
+
520
+ response = llm_with_tools.invoke([
521
+ SystemMessage(content=simple_prompt),
522
+ HumanMessage(content=state['query'])
523
+ ])
524
+
525
+ return {
526
+ "messages": state["messages"] + [response],
527
+ "final_answer": response.content
528
+ }
529
+
530
+
531
+ def should_use_planning(state: AgentState) -> str:
532
+ """Route based on complexity assessment."""
533
+ complexity = state["complexity_assessment"]
534
+
535
+ if complexity.level == "simple" and not complexity.needs_planning:
536
+ return "simple_executor"
537
+ else:
538
+ return "planner"
539
+
540
+ """
541
+ def critic_evaluator(state: AgentState) -> AgentState:
542
+
543
+ print("=== ANSWER CRITIQUE ===")
544
+
545
+ critic_llm = llm.with_structured_output(CritiqueFeedback)
546
+
547
+ # Gather tool execution results for context
548
+ tool_results = []
549
+ for msg in state["messages"]:
550
+ if hasattr(msg, 'tool_calls') and msg.tool_calls:
551
+ tool_results.extend([f"Tool: {tc['name']}, Args: {tc['args']}" for tc in msg.tool_calls])
552
+
553
+ if state.get("plan"):
554
+ terra = state.get("plan")
555
+ else:
556
+ terra = "No plan used"
557
+ critique_prompt = CRITIC_PROMPT.format(
558
+ query=state["query"],
559
+ plan=terra,
560
+ answer=state["final_answer"],
561
+ tool_results=tool_results[:5] #Limit context
562
+ )
563
+
564
+ critique = critic_llm.invoke([
565
+ SystemMessage(content=critique_prompt),
566
+ HumanMessage(content="Please evaluate this answer.")
567
+ ])
568
+
569
+ print(f"Quality Score: {critique.quality_score}/10")
570
+ print(f"Complete: {critique.is_complete}")
571
+ print(f"Accurate: {critique.is_accurate}")
572
+ if critique.errors_found:
573
+ print(f"Errors: {critique.errors_found}")
574
+ if critique.needs_replanning:
575
+ print(f"Needs replanning: {critique.replan_instructions}")
576
+
577
+ return {
578
+ "critique_feedback": critique,
579
+ "iteration_count": state.get("iteration_count", 0) + 1
580
+ }
581
+ """
582
+
583
+ def critic_evaluator(state: AgentState) -> AgentState:
584
+ """Enhanced critic that evaluates execution reports."""
585
+ print("=== ENHANCED ANSWER CRITIQUE ===")
586
+
587
+ report = state.get("execution_report")
588
+ critic_llm = llm.with_structured_output(CritiqueFeedback)
589
+
590
+ critique_prompt = CRITIC_PROMPT.format(
591
+ query=report.query_summary,
592
+ approach=report.approach_used,
593
+ tools=report.tools_executed,
594
+ findings=report.key_findings,
595
+ sources=report.data_sources,
596
+ confidence=report.confidence_level,
597
+ limitations=report.limitations,
598
+ answer=report.final_answer
599
+ )
600
+
601
+ critique = critic_llm.invoke([
602
+ SystemMessage(content=critique_prompt),
603
+ HumanMessage(content="Evaluate this execution report thoroughly.")
604
+ ])
605
+
606
+ print(f"Quality Score: {critique.quality_score}/10")
607
+ print(f"Complete: {critique.is_complete}")
608
+ print(f"Accurate: {critique.is_accurate}")
609
+
610
+ if critique.errors_found:
611
+ print(f"Issues found: {critique.errors_found}")
612
+
613
+ if critique.needs_replanning:
614
+ print(f"Replanning needed: {critique.replan_instructions}")
615
+
616
+ return {
617
+ "critique_feedback": critique,
618
+ "iteration_count": state.get("iteration_count", 0) + 1
619
+ }
620
+
621
+
622
+
623
+ def should_replan(state: AgentState) -> str:
624
+ """Decide whether to accept answer, replan, or stop."""
625
+ critique = state.get("critique_feedback")
626
+ iteration_count = state.get("iteration_count", 0)
627
+ max_iterations = state.get("max_iterations", 3)
628
+
629
+
630
+ print(f"=== REPLAN DECISION ===")
631
+ print(f"Iteration: {iteration_count}/{max_iterations}")
632
+ print(f"Quality score: {critique.quality_score if critique else 'N/A'}")
633
+ print(f"Needs replanning: {critique.needs_replanning if critique else 'N/A'}")
634
+
635
+ if not critique:
636
+ return "end"
637
+
638
+ # Stop if max iterations reached
639
+ if iteration_count >= max_iterations:
640
+ print(f"Max iterations ({max_iterations}) reached. Accepting current answer.")
641
+ return "end"
642
+
643
+ # Accept if quality is good enough
644
+ if critique.quality_score >= 7 or not critique.needs_replanning:
645
+ print("Quality acceptable, ending execution")
646
+ return "end"
647
+
648
+ # Replan if quality is poor and we haven't exceeded max iterations
649
+ if critique.needs_replanning and iteration_count < max_iterations:
650
+ print("Replanning due to critic feedback...")
651
+ return "replan"
652
+
653
+ return "end"
654
+
655
+ def replanner(state: AgentState) -> AgentState:
656
+ """Create a revised plan based on critic feedback."""
657
+ print("=== REPLANNING ===")
658
+
659
+ critique = state["critique_feedback"]
660
+ previous_plan = state.get("plan")
661
+
662
+ replan_prompt = f"""
663
+ {SYSTEM_PROMPT_PLANNER}
664
+
665
+ REPLANNING CONTEXT:
666
+ Original Query: {state['query']}
667
+ Previous Plan: {previous_plan if previous_plan else {}}
668
+
669
+ CRITIC FEEDBACK:
670
+ - Quality Score: {critique.quality_score}/10
671
+ - Issues Found: {critique.errors_found}
672
+ - Missing Elements: {critique.missing_elements}
673
+ - Improvement Suggestions: {critique.suggested_improvements}
674
+ - Specific Instructions: {critique.replan_instructions}
675
+
676
+ Create a REVISED plan that addresses these issues. Focus on fixing the identified problems.
677
+ """
678
+
679
+ revised_plan = planner_llm.invoke([
680
+ SystemMessage(content=replan_prompt),
681
+ HumanMessage(content="Create a revised plan based on the feedback.")
682
+ ])
683
+
684
+ print("Plan revised based on critic feedback")
685
+
686
+ # Очищаем историю сообщений от неполных tool_calls
687
+ current_messages = state.get("messages", [])
688
+ cleaned_messages = clean_message_history(current_messages)
689
+
690
+ # Оставляем только системные сообщения и начальный запрос
691
+ essential_messages = []
692
+ for msg in cleaned_messages:
693
+ if isinstance(msg, (SystemMessage, HumanMessage)):
694
+ # Сохраняем системные сообщения и пользовательские запросы
695
+ if ("complexity" in msg.content.lower() or
696
+ "assess" in msg.content.lower() or
697
+ isinstance(msg, HumanMessage)):
698
+ essential_messages.append(msg)
699
+
700
+ print(f"Cleaned message history: {len(current_messages)} -> {len(essential_messages)} messages")
701
+
702
+ return {
703
+ "plan": revised_plan,
704
+ "current_step": 0,
705
+ "reasoning_done": False,
706
+ "messages": essential_messages,
707
+ "execution_report": None
708
+ }
709
+
710
+ # === CELL 4 FROM NOTEBOOK ===
711
+ #GRAPH BUILDING
712
+
713
+ builder = StateGraph(AgentState)
714
+ builder.add_node("INPUT", query_input)
715
+ builder.add_node("COMPLEXITY_ASSESSOR", complexity_assessor)
716
+ builder.add_node("PLANNING", planner)
717
+ builder.add_node("AGENT", agent)
718
+ builder.add_node("TOOLS", DEBUGGING_TOOL_NODE)
719
+ builder.add_node("FINALIZER", enhanced_finalizer)
720
+ builder.add_node("SIMPLE_EXECUTOR", simple_executor)
721
+ builder.add_node("CRITIC", critic_evaluator)
722
+ builder.add_node("REPLANNER", replanner)
723
+
724
+ builder.set_entry_point("INPUT")
725
+ builder.add_edge("INPUT", "COMPLEXITY_ASSESSOR")
726
+
727
+ builder.add_conditional_edges(
728
+ "COMPLEXITY_ASSESSOR",
729
+ should_use_planning,
730
+ {"simple_executor": "SIMPLE_EXECUTOR", "planner": "PLANNING"},
731
+ )
732
+ builder.add_edge("SIMPLE_EXECUTOR", "FINALIZER")
733
+
734
+
735
+ builder.add_edge("PLANNING", "AGENT")
736
+ builder.add_conditional_edges(
737
+ "AGENT",
738
+ should_continue,
739
+ {"tools": "TOOLS", "agent": "AGENT", "final_answer": "FINALIZER"},
740
+ )
741
+ builder.add_edge("TOOLS", "AGENT")
742
+ builder.add_edge("FINALIZER", "CRITIC")
743
+ builder.add_conditional_edges(
744
+ "CRITIC",
745
+ should_replan,
746
+ {"end": END, "replan": "REPLANNER"},
747
+ )
748
+ builder.add_edge("REPLANNER", "AGENT")
749
+
750
+
751
+ system = builder.compile(checkpointer=MemorySaver())
752
+
753
+ # === CELL 5 FROM NOTEBOOK ===
754
+ workflow = system.invoke({"query" : "How many cumulative milliliters of fluid is in all the opaque-capped vials without stickers in the 114 version of the kit that was used for the PromethION long-read sequencing in the paper De Novo-Whole Genome Assembly of the Roborovski Dwarf Hamster (Phodopus roborovskii) Genome?", "current_step": 0, "reasoning_done": False, "files" : [], "files_contents" : {}, "iteration_count" : 0, "max_iterations" : 10, "plan" : None} , config = config)
755
+
756
+ # === CELL 6 FROM NOTEBOOK ===
757
+ for message in workflow["messages"]:
758
+ message.pretty_print()
759
+
760
+ print("\n=== FINAL ANSWER ===")
761
+
762
+ # === CELL 7 FROM NOTEBOOK ===
763
+ workflow["final_answer"]
764
+
765
+ # === CELL 8 FROM NOTEBOOK ===
766
+ workflow
767
+
768
+ # === CELL 9 FROM NOTEBOOK ===
769
+ #TO-DO:
770
+ # - imrove image generation and plots/tables creation
771
+ # - add more tools (e.g. calendar, email, pdf editing, file system)
772
+ # - UI creation
src/prompts/__init__.py ADDED
File without changes
src/prompts/prompts.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM_PROMPT_PLANNER = """
2
+ You are the PLANNER of a multi-tool agent (GAIA I–II level). Produce a minimal, reliable plan to solve the user's request using available tools. You DO NOT call tools; output ONLY a JSON plan. Tools are bound via .bind_tools()—use EXACT names.
3
+
4
+ CORE RULES:
5
+ - MINIMALITY: 1-3 steps max; chain only essentials (e.g., search → download → analyze).
6
+ - ROUTING: Classify as info (web facts), calc (math on known data), table (CSV/Excel agg), doc_qa (PDF/DOCX/TXT extract), image_qa (IMG OCR/vision), multi_hop (anything cross-modality or research—default for unknowns).
7
+ - PREREQUISITES: For external docs/images (e.g., "paper X", URLs): ALWAYS start with web_search/arxiv_search → download_file_from_url (local path like "paper.pdf") → analyze_*. NEVER assume local files—validate existence implicitly via chain.
8
+ - COST-AWARE: Cheap first: search snippets > full download > compute. No raw files to safe_code_run—extract first.
9
+ - EVIDENCE: Mandate citations/pages for facts; units/rounding explicit in guidelines.
10
+ - FALLBACKS: Every step needs success_criteria; on_fail="replan" (default) or "sN" (jump). Add 1 fallback step if high-risk (e.g., no-results → alt query).
11
+
12
+ ROUTING PATTERNS (MANDATORY CHAINS):
13
+ - info: web_search/wiki_search/arxiv_search → cite snippets.
14
+ - calc: If data missing, insert extract step → safe_code_run (e.g., "sum volumes from text").
15
+ - table: analyze_csv_file/analyze_excel_file (preview) → safe_code_run (agg/query).
16
+ - doc_qa: web_search("paper title PDF") → download_file_from_url → analyze_pdf_file/analyze_docx_file (query="vials fluid ml") → safe_code_run if sum needed.
17
+ - image_qa: web_search → download_file_from_url → analyze_image_file/vision_qa_gemma → safe_code_run for chart-to-table.
18
+ - multi_hop: Decompose (e.g., sub-query1: search; sub-query2: extract) → synthesize.
19
+
20
+ Output ONLY valid JSON:
21
+ {
22
+ "task_type": "info|calc|table|doc_qa|image_qa|multi_hop",
23
+ "assumptions": ["..."], // 0-2 max; e.g., "Paper details vials explicitly"
24
+ "plan_rationale": "Brief: why route + key tools/chain", // 1 sentence
25
+ "steps": [ // 1-3 only
26
+ {
27
+ "id": "s1",
28
+ "description": "Precise action + why (e.g., 'web_search for paper PDF to locate source')",
29
+ "evidence_needed": ["citations","page_numbers","stats_check"], // 1-3
30
+ "success_criteria": "e.g., 'Top result has PDF URL; or data extracted'",
31
+ "on_fail": "replan|sN", // Default: replan
32
+ "outputs_to_state": ["e.g., 'pdf_url', 'extracted_text'"] // For chaining
33
+ }
34
+ ],
35
+ "answer_guidelines": {
36
+ "final_answer_template": "e.g., 'Cumulative volume: X mL (from [cite])'",
37
+ "citations_required": true,
38
+ "min_citations": 1,
39
+ "units_policy": "e.g., 'mL; convert if cm³'",
40
+ "rounding_policy": "e.g., 'Nearest integer'",
41
+ "include_artifacts": ["snippets","tables"] // 0-2
42
+ }
43
+ }
44
+
45
+ CONSTRAINTS:
46
+ - Valid JSON only—no extras. If query trivial (no tools), task_type="info" with 0 steps.
47
+ - Exact tool names: web_search, download_file_from_url, analyze_pdf_file, safe_code_run, etc.
48
+ - For research: If no chain, replan triggers auto-fix.
49
+ """
50
+
51
+ SYSTEM_EXECUTOR_PROMPT = """
52
+ ROLE: EXECUTOR of multi-tool agent (GAIA level). You follow the FIXED {plan} EXACTLY—no changes, no new steps. Current step: {current_step_id} ("{step_desc}"). Advance ONE step per response.
53
+
54
+ EXECUTION RULES:
55
+ - BEFORE EVERY TOOL: <REASONING> (2-3 sentences: What step? Why this tool? Exact inputs? Expected output?) </REASONING>
56
+ - THEN: Tool call ONLY for this step (exact name/args from plan). NO OTHER OUTPUT.
57
+ - NO TOOLS? Direct output (e.g., "Calc: 5 mL") + set reasoning_done=True.
58
+ - Check state for priors (e.g., if s2 needs pdf_url from s1, wait/replan if missing).
59
+ - On fail (bad output): <REASONING>Assess + on_fail action</REASONING> then tool or stop.
60
+ - END STEP: If success, output "STEP COMPLETE: {outputs_to_state}" to advance.
61
+
62
+ RESOURCE CHAIN (MANDATORY IF NEEDED):
63
+ - External doc? Use plan's search→download before analyze.
64
+ - NEVER guess paths—use state["files"] or replan.
65
+
66
+ OUTPUT FORMAT: <REASONING>...</REASONING> [tool call or direct] [STEP COMPLETE if done]. NO JSON/PLANS/MARKDOWN.
67
+
68
+ FAILSAFE: If unclear, <REASONING>Replan needed</REASONING> and stop.
69
+ DO NOT FORGET TO ADD <FINAL_ANSWER> IF YOU THINK IT'S TIME TO ANSWER THE USER AND YOU HAVE ALL THE DATA FOR EXACT ANSWER.
70
+ """
71
+
72
+
73
+ COMPLEXITY_ASSESSOR_PROMPT = """
74
+ You are a COMPLEXITY ASSESSOR for a multi-tool agent system.
75
+ Your job is to analyze user queries and determine their complexity level and processing requirements.
76
+
77
+ COMPLEXITY LEVELS:
78
+ 1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use
79
+ - Examples: "What is 2+2?", "Define photosynthesis", "What's the capital of France?"
80
+
81
+ 2. MODERATE: Questions requiring 1-3 tool calls or basic analysis
82
+ - Examples: "Search for recent news about AI", "Analyze this CSV file", "What's the weather tomorrow?"
83
+
84
+ 3. COMPLEX: Multi-step problems requiring planning, multiple tools, or sophisticated reasoning
85
+ - Examples: Research tasks, multi-file analysis, calculations with dependencies, creative projects
86
+
87
+ ASSESSMENT CRITERIA:
88
+ - Number of steps likely needed
89
+ - Tool complexity and dependencies
90
+ - Data processing requirements
91
+ - Need for intermediate reasoning
92
+ - Risk of failure without proper planning
93
+
94
+ RULES:
95
+ - SIMPLE queries bypass planning entirely
96
+ - MODERATE queries may use lightweight planning
97
+ - COMPLEX queries require full planning with fallbacks
98
+ - When in doubt, err toward higher complexity
99
+
100
+ Analyze the query and respond with your assessment.
101
+ """
102
+
103
+ CRITIC_PROMPT = """
104
+ You are the CRITIC of a multi-tool agent system.
105
+ Your job is to evaluate execution reports and provide detailed feedback.
106
+
107
+ EVALUATION FRAMEWORK:
108
+
109
+ 1. COMPLETENESS (0-3 points):
110
+ - 3: Fully addresses all aspects of the query
111
+ - 2: Addresses main aspects, minor gaps
112
+ - 1: Partial answer, significant gaps
113
+ - 0: Incomplete or off-topic
114
+
115
+ 2. ACCURACY (0-3 points):
116
+ - 3: All information appears accurate and well-sourced
117
+ - 2: Mostly accurate, minor issues
118
+ - 1: Some accuracy concerns
119
+ - 0: Significant accuracy problems
120
+
121
+ 3. METHODOLOGY (0-2 points):
122
+ - 2: Appropriate tools and approach used
123
+ - 1: Acceptable approach, could be better
124
+ - 0: Poor methodology or tool selection
125
+
126
+ 4. EVIDENCE (0-2 points):
127
+ - 2: Strong evidence and sources provided
128
+ - 1: Some evidence provided
129
+ - 0: Insufficient evidence
130
+
131
+ TOTAL SCORE: /10 points
132
+
133
+ DECISION THRESHOLDS:
134
+ - 8-10: Accept (excellent quality)
135
+ - 6-7: Accept with minor notes
136
+ - 4-5: Marginal, consider replanning
137
+ - 0-3: Reject, requires replanning
138
+
139
+ EXECUTION REPORT TO EVALUATE:
140
+ Query: {query}
141
+ Approach: {approach}
142
+ Tools Used: {tools}
143
+ Key Findings: {findings}
144
+ Sources: {sources}
145
+ Confidence: {confidence}
146
+ Limitations: {limitations}
147
+ Final Answer: {answer}
148
+
149
+ Provide detailed critique focusing on what works well and what could be improved.
150
+ """
src/schemas.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional, Literal, Iterable
2
+ from pydantic import BaseModel, Field, ValidationError
3
+
4
+
5
+ class ComplexityLevel(BaseModel):
6
+ level: Literal["simple", "moderate", "complex"] = Field(description="Complexity level of the query")
7
+ reasoning: str = Field(description="Explanation for the complexity assessment")
8
+ needs_planning: bool = Field(description="Whether this query requires detailed planning")
9
+ suggested_approach: str = Field(description="Recommended approach for handling this query")
10
+
11
+ class CritiqueFeedback(BaseModel):
12
+ quality_score: int = Field(ge=1, le=10, description="Quality score from 1-10")
13
+ is_complete: bool = Field(description="Whether the answer is complete")
14
+ is_accurate: bool = Field(description="Whether the answer appears accurate")
15
+ missing_elements: List[str] = Field(default_factory=list, description="What's missing from the answer")
16
+ errors_found: List[str] = Field(default_factory=list, description="Potential errors identified")
17
+ suggested_improvements: List[str] = Field(default_factory=list, description="Suggestions for improvement")
18
+ needs_replanning: bool = Field(description="Whether the plan should be revised")
19
+ replan_instructions: Optional[str] = Field(default=None, description="Instructions for replanning")
20
+
21
+
22
+
23
+ TaskType = Literal["info", "calc", "table", "doc_qa", "image_qa", "multi_hop"]
24
+ EvidenceTag = Literal["citations", "page_numbers", "figure_captions", "stats_check", "unit_check"]
25
+
26
+ class PlanStep(BaseModel):
27
+ id: str
28
+ description: str
29
+ #tool: Optional[str] = Field(default=None, description="Exact tool name or null for reasoning step")
30
+ #args_hint: Dict[str, Any] = Field(default_factory=dict)
31
+ evidence_needed: List[EvidenceTag] = Field(default_factory=list)
32
+ success_criteria: str
33
+ on_fail: str = Field(default="replan", description="One of: 'replan' | 'stop' | step-id")
34
+ outputs_to_state: List[str] = Field(default_factory=list)
35
+
36
+ class AnswerGuidelines(BaseModel):
37
+ final_answer_template: str
38
+ citations_required: bool = False
39
+ min_citations: int = 0
40
+ units_policy: Optional[str] = None
41
+ rounding_policy: Optional[str] = None
42
+ include_artifacts: List[str] = Field(default_factory=list)
43
+
44
+ class PlannerPlan(BaseModel):
45
+ task_type: TaskType
46
+ assumptions: List[str] = Field(default_factory=list)
47
+ plan_rationale: str
48
+ steps: List[PlanStep]
49
+ answer_guidelines: AnswerGuidelines
50
+
51
+
52
+ class ToolExecution(BaseModel):
53
+ tool_name: str
54
+ arguments: str
55
+ call_id: str
56
+
57
+ class Config:
58
+ extra = "forbid"
59
+
60
+ class ExecutionReport(BaseModel):
61
+ """Structured report for critic evaluation."""
62
+ query_summary: str = Field(description="Brief summary of the user's query")
63
+ approach_used: str = Field(description="What approach/strategy was used")
64
+ tools_executed: List[ToolExecution] = Field(default_factory=list, description="List of tools used with results")
65
+ key_findings: List[str] = Field(default_factory=list, description="Main findings or results")
66
+ data_sources: List[str] = Field(default_factory=list, description="Sources of information used")
67
+ assumptions_made: List[str] = Field(default_factory=list, description="Any assumptions made during execution")
68
+ confidence_level: Literal["low", "medium", "high"] = Field(description="Confidence in the answer")
69
+ limitations: List[str] = Field(default_factory=list, description="Known limitations or caveats")
70
+ final_answer: str = Field(description="The actual answer to the user's query")
71
+
72
+ class Config:
73
+ extra = "forbid"
src/state.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import MessagesState
2
+ from typing import List, Annotated, Optional, Dict, Any
3
+ from schemas import PlannerPlan, ComplexityLevel, CritiqueFeedback, ExecutionReport
4
+ from typing import Sequence
5
+ from langchain_core.messages import BaseMessage
6
+ from langgraph.graph.message import add_messages
7
+
8
+ class AgentState(MessagesState):
9
+ query: str
10
+ final_answer: str
11
+ plan: Optional[PlannerPlan]
12
+ complexity_assessment: ComplexityLevel
13
+ current_step: int
14
+ reasoning_done: bool
15
+ messages : Annotated[Sequence[BaseMessage], add_messages]
16
+ files: List[str]
17
+ file_contents: Dict[str, Any]
18
+ critique_feedback: Optional[CritiqueFeedback]
19
+ iteration_count :int
20
+ max_iterations: int
21
+ execution_report : ExecutionReport
22
+
src/tools/__init__.py ADDED
File without changes
src/tools/code_interpreter.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/gaia_agent/tools/safe_code_run.py
2
+ from __future__ import annotations
3
+ import io, os, sys, uuid, base64, traceback, contextlib, tempfile, shutil
4
+ import multiprocessing as mp
5
+ from typing import Optional, Dict, Any, List
6
+ from pydantic import ValidationError
7
+ from langchain_core.tools import tool
8
+ from utils.code_run import (
9
+ CodeRunRequest, CodeRunResult, EnvInfo,
10
+ PlotArtifact, DataFrameArtifact,
11
+ )
12
+
13
+ # ====================== HELPERS ======================
14
+
15
+ def _b64_png(fig, dpi: int) -> str:
16
+ import matplotlib.pyplot as plt
17
+ buf = io.BytesIO()
18
+ fig.savefig(buf, format="png", dpi=dpi, bbox_inches="tight")
19
+ buf.seek(0)
20
+ data = base64.b64encode(buf.read()).decode("utf-8")
21
+ buf.close()
22
+ return data
23
+
24
+ def _clip_df(df, max_rows: int, max_cols: int):
25
+ sub = df.iloc[:max_rows, :max_cols]
26
+ head = sub.to_dict(orient="records")
27
+ dtypes = {str(k): str(v) for k, v in sub.dtypes.to_dict().items()}
28
+ return head, list(df.shape), dtypes
29
+
30
+ def _env_info() -> EnvInfo:
31
+ try:
32
+ import numpy as _np; nv = _np.__version__
33
+ except Exception:
34
+ nv = None
35
+ try:
36
+ import pandas as _pd; pv = _pd.__version__
37
+ except Exception:
38
+ pv = None
39
+ return EnvInfo(numpy=nv, pandas=pv)
40
+
41
+ # ====================== CHILD PROCESS ======================
42
+
43
+ def _child_exec(payload: Dict[str, Any], queue: mp.Queue):
44
+ """
45
+ Изолированное выполнение user-кода:
46
+ - урезанные builtins
47
+ - безопасный open (read-only в sandbox)
48
+ - белый список импортов
49
+ - запрет сети
50
+ - temp cwd + очистка
51
+ - RLIMIT CPU/AS (Unix)
52
+ - захват stdout/stderr
53
+ - сбор matplotlib и pandas.DataFrame (по флагам)
54
+ """
55
+ import builtins, importlib
56
+
57
+ code: str = payload["code"]
58
+ limits: Dict[str, Any] = payload["limits"]
59
+ allowed: List[str] = payload["allowed"]
60
+ return_plots: bool = payload["return_plots"]
61
+ return_dfs: bool = payload["return_dfs"]
62
+
63
+ # ---------- OS limits (Unix) ----------
64
+ try:
65
+ import resource
66
+ cpu = max(1, int(limits["timeout_seconds"]))
67
+ resource.setrlimit(resource.RLIMIT_CPU, (cpu, cpu + 1))
68
+ # мягкий лимит RAM ~1.5GB (подстрой при необходимости)
69
+ one_gb = 1024 * 1024 * 1024
70
+ resource.setrlimit(resource.RLIMIT_AS, (int(1.5 * one_gb), int(1.5 * one_gb)))
71
+ # ограничим размеры файлов
72
+ resource.setrlimit(resource.RLIMIT_FSIZE, (50 * 1024 * 1024, 50 * 1024 * 1024))
73
+ except Exception:
74
+ pass
75
+
76
+ # ---------- Sandbox FS ----------
77
+ workdir = tempfile.mkdtemp(prefix="ci_")
78
+ os.chdir(workdir)
79
+
80
+ # ---------- Network ban ----------
81
+ try:
82
+ import socket
83
+ class _NoNet(socket.socket):
84
+ def __init__(self, *a, **kw):
85
+ raise OSError("Network disabled in sandbox")
86
+ socket.socket = _NoNet # type: ignore
87
+ except Exception:
88
+ pass
89
+
90
+ # ---------- Builtins ----------
91
+ safe_names = [
92
+ "abs","all","any","bool","dict","float","int","len","list","max","min",
93
+ "range","str","sum","print","enumerate","zip","map","filter","sorted",
94
+ "reversed","complex","pow","divmod"
95
+ ]
96
+ safe_builtins = {n: getattr(builtins, n) for n in safe_names}
97
+
98
+ # сохранём реальный open, потом подменим на безопасный
99
+ real_open = open
100
+
101
+ def _safe_open(path, mode="r", *a, **kw):
102
+ # Разрешаем ТОЛЬКО чтение, ТОЛЬКО внутри workdir
103
+ if any(m in mode for m in ("w", "a", "+", "x")):
104
+ raise PermissionError("Write access forbidden in sandbox")
105
+ abspath = os.path.abspath(path)
106
+ # запрещаем выход из песочницы и следование symlink наружу
107
+ if not abspath.startswith(workdir + os.sep) and abspath != workdir:
108
+ raise PermissionError("Access outside sandbox forbidden")
109
+ # запретим двоичный write по flags
110
+ return real_open(abspath, mode, *a, **kw)
111
+
112
+ # удалим опасные builtins и поставим наш open
113
+ for banned in ["exec","eval","__import__","compile","input","globals","locals","vars","dir","help","__build_class__"]:
114
+ safe_builtins.pop(banned, None)
115
+ safe_builtins["open"] = _safe_open
116
+
117
+ # ---------- Import whitelist ----------
118
+ real_import = builtins.__import__
119
+ ALLOWED = set(allowed)
120
+ def _safe_import(name, globals=None, locals=None, fromlist=(), level=0):
121
+ base = name.split(".")[0]
122
+ if (name not in ALLOWED) and (base not in ALLOWED):
123
+ raise ImportError(f"Module '{name}' is not allowed")
124
+ return real_import(name, globals, locals, fromlist, level)
125
+
126
+ glb: Dict[str, Any] = {"__builtins__": safe_builtins}
127
+ lcl: Dict[str, Any] = {}
128
+
129
+ # ---------- Matplotlib headless ----------
130
+ plt = None
131
+ if return_plots:
132
+ try:
133
+ import matplotlib
134
+ matplotlib.use("Agg")
135
+ import matplotlib.pyplot as _plt
136
+ plt = _plt
137
+ except Exception:
138
+ plt = None
139
+
140
+ # ---------- Preload whitelisted mods ----------
141
+ preloads = [
142
+ "math","random","statistics","datetime","re","json","fractions","decimal",
143
+ "numpy","pandas","cmath",
144
+ "matplotlib","matplotlib.pyplot"
145
+ ]
146
+ for mod in preloads:
147
+ try:
148
+ if (mod in ALLOWED) or (mod.split(".")[0] in ALLOWED):
149
+ glb[mod.split(".")[-1]] = importlib.import_module(mod)
150
+ except Exception:
151
+ pass
152
+
153
+ # включаем безопасный импорт
154
+ safe_builtins["__import__"] = _safe_import
155
+
156
+ # ---------- Execute ----------
157
+ out_buf, err_buf = io.StringIO(), io.StringIO()
158
+ status = "error"
159
+ result_repr: Optional[str] = None
160
+ plots: List[Dict[str, Any]] = []
161
+ dataframes: List[Dict[str, Any]] = []
162
+
163
+ try:
164
+ with contextlib.redirect_stdout(out_buf), contextlib.redirect_stderr(err_buf):
165
+ exec(code, glb, lcl)
166
+ status = "success"
167
+
168
+ # вернём repr результата, если есть _ или result
169
+ if "_" in lcl:
170
+ result_repr = repr(lcl["_"])
171
+ elif "result" in lcl:
172
+ result_repr = repr(lcl["result"])
173
+
174
+ # графики
175
+ if plt is not None and return_plots:
176
+ fig_nums = plt.get_fignums()[: int(limits["max_plots"])]
177
+ for num in fig_nums:
178
+ fig = plt.figure(num)
179
+ b64 = _b64_png(fig, dpi=int(limits["plot_dpi"]))
180
+ plots.append({"data_base64": b64, "format": "png"})
181
+ plt.close("all")
182
+
183
+ # DataFrame’ы
184
+ if return_dfs:
185
+ try:
186
+ import pandas as _pd
187
+ for name, val in list(lcl.items()):
188
+ if isinstance(val, _pd.DataFrame):
189
+ if len(dataframes) >= int(limits["max_dataframes"]):
190
+ break
191
+ head, shape, dtypes = _clip_df(
192
+ val,
193
+ max_rows=int(limits["max_df_rows"]),
194
+ max_cols=int(limits["max_df_cols"]),
195
+ )
196
+ dataframes.append({
197
+ "name": str(name),
198
+ "head": head,
199
+ "shape": shape,
200
+ "dtypes": dtypes,
201
+ })
202
+ except Exception:
203
+ pass
204
+
205
+ except Exception:
206
+ status = "error"
207
+ print(traceback.format_exc(), file=err_buf)
208
+ finally:
209
+ try:
210
+ shutil.rmtree(workdir, ignore_errors=True)
211
+ except Exception:
212
+ pass
213
+
214
+ queue.put({
215
+ "status": status,
216
+ "stdout": out_buf.getvalue(),
217
+ "stderr": err_buf.getvalue(),
218
+ "result_repr": result_repr,
219
+ "plots": plots,
220
+ "dataframes": dataframes,
221
+ })
222
+
223
+ # ====================== HOST PROCESS ======================
224
+
225
+ def run_python_in_subprocess(req: CodeRunRequest) -> CodeRunResult:
226
+ exec_id = str(uuid.uuid4())
227
+ ctx = mp.get_context("spawn")
228
+ q: mp.Queue = ctx.Queue()
229
+
230
+ payload = {
231
+ "code": req.code,
232
+ "limits": req.limits.model_dump(),
233
+ "allowed": list(req.allowed_modules),
234
+ "return_plots": bool(req.return_plots),
235
+ "return_dfs": bool(req.return_dataframes),
236
+ }
237
+
238
+ p = ctx.Process(target=_child_exec, args=(payload, q), daemon=True)
239
+ p.start()
240
+ p.join(req.limits.timeout_seconds)
241
+
242
+ status = "timeout"
243
+ stdout = ""
244
+ stderr = "Timed out."
245
+ result_repr = None
246
+ plots: List[PlotArtifact] = []
247
+ dataframes: List[DataFrameArtifact] = []
248
+
249
+ if p.is_alive():
250
+ p.terminate()
251
+ p.join(1)
252
+ else:
253
+ try:
254
+ msg = q.get_nowait()
255
+ status = msg.get("status", "error")
256
+ stdout = (msg.get("stdout") or "")[: req.limits.max_stdout_chars]
257
+ stderr = (msg.get("stderr") or "")[: req.limits.max_stderr_chars]
258
+ result_repr = msg.get("result_repr")
259
+ plots = [PlotArtifact(**p_) for p_ in msg.get("plots", [])]
260
+ dataframes = [DataFrameArtifact(**d_) for d_ in msg.get("dataframes", [])]
261
+ except Exception as e:
262
+ status = "error"
263
+ stderr = f"Worker crashed: {e}"
264
+
265
+ return CodeRunResult(
266
+ execution_id=exec_id,
267
+ status=status,
268
+ stdout=stdout,
269
+ stderr=stderr,
270
+ result_repr=result_repr,
271
+ plots=plots,
272
+ dataframes=dataframes,
273
+ env=_env_info(),
274
+ )
275
+
276
+ # ====================== LangChain TOOL ======================
277
+
278
+ @tool
279
+ def safe_code_run(code:str) -> str:
280
+ """
281
+ Safely execute Python code in an isolated subprocess with security restrictions.
282
+
283
+ IMPORTANT - To see output, you MUST:
284
+ - Use print() statements for output
285
+ - Assign final result to variable 'result' or '_'
286
+ - Save data to variables for DataFrame/plot capture
287
+
288
+ Examples:
289
+ ✅ Good:
290
+ result = 2 + 2
291
+ print(f"Answer: {result}")
292
+
293
+ ✅ Good:
294
+ import numpy as np
295
+ arr = np.array([1, 2, 3])
296
+ print(arr.mean())
297
+
298
+ ✅ Good:
299
+ import pandas as pd
300
+ df = pd.DataFrame({'x': [1, 2], 'y': [3, 4]})
301
+ print(df)
302
+ result = df.sum()
303
+
304
+ ❌ Bad (no output):
305
+ 2 + 2 # This won't show anything
306
+
307
+ Security features:
308
+ - Whitelisted imports only (numpy, pandas, matplotlib, etc.)
309
+ - Read-only file access within sandbox
310
+ - Network disabled
311
+ - Memory/CPU limits
312
+ - Timeout protection
313
+
314
+ Returns JSON with: status, stdout, stderr, result_repr, plots, dataframes, env info
315
+ """
316
+
317
+ # упаковываем запрос в JSON
318
+ req = CodeRunRequest(
319
+ code=code,
320
+ # для первого запуска дайте запас
321
+ limits=dict(timeout_seconds=35) # или 45
322
+ ).model_dump_json()
323
+
324
+ res = run_python_in_subprocess(CodeRunRequest.model_validate_json(req))
325
+ return res.model_dump_json()
src/tools/tools.py ADDED
@@ -0,0 +1,883 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import json
5
+ import base64
6
+ import tldextract
7
+ import tempfile
8
+ from urllib.parse import urlparse
9
+ import io
10
+ import pandas as pd
11
+ from typing import List, Optional, Dict, Any
12
+ from datetime import datetime
13
+ from PIL import Image, ImageStat, ExifTags
14
+ import google.generativeai as genai
15
+ from langchain_core.tools import tool
16
+ from langchain_community.tools.tavily_search import TavilySearchResults
17
+ from langchain_community.document_loaders import ArxivLoader
18
+ from langchain_community.document_loaders import WikipediaLoader
19
+ from PIL import ImageDraw, ImageFont, ImageEnhance, ImageFilter
20
+ from utils.image_processing import *
21
+
22
+ def _exif_dict(img: Image.Image) -> dict:
23
+ try:
24
+ exif = img._getexif() or {}
25
+ out = {}
26
+ for k, v in exif.items():
27
+ tag = ExifTags.TAGS.get(k, str(k))
28
+ out[tag] = v if isinstance(v, (int, float, str)) else str(v)
29
+ return out
30
+ except Exception:
31
+ return {}
32
+
33
+ def _clip(text: str | None, n: int) -> str:
34
+ """Утилита: безопасно обрезаем длинные сниппеты."""
35
+ if not text:
36
+ return ""
37
+ text = text.strip()
38
+ return (text[: n - 1] + "…") if len(text) > n else text
39
+
40
+
41
+ def _parse_dt(v) -> Optional[str]:
42
+ """[ИЗМЕНЕНИЕ] Приводим даты к ISO-строке, если возможно."""
43
+ try:
44
+ if isinstance(v, datetime):
45
+ return v.isoformat()
46
+ if isinstance(v, str) and v:
47
+
48
+ return v
49
+ except Exception:
50
+ pass
51
+ return None
52
+
53
+ def _read_text_best_effort(path: str, max_chars: int) -> tuple[str, str]:
54
+ # пробуем utf-8 → fallback latin-1 (без chardet)
55
+ try:
56
+ with open(path, "r", encoding="utf-8") as f:
57
+ s = f.read()
58
+ return s[:max_chars], "utf-8"
59
+ except Exception:
60
+ with open(path, "r", encoding="latin-1", errors="replace") as f:
61
+ s = f.read()
62
+ return s[:max_chars], "latin-1"
63
+
64
+ # ИСПРАВЛЕНИЕ 3: Улучшить preprocess_files с более точным определением типов
65
+ def preprocess_files(files: List[str]) -> Dict[str, Dict[str, Any]]:
66
+ """Анализирует файлы и возвращает их метаданные"""
67
+ file_info = {}
68
+
69
+ for file_path in files:
70
+ if not os.path.exists(file_path):
71
+ print(f"Warning: File {file_path} not found")
72
+ continue
73
+
74
+ file_ext = os.path.splitext(file_path)[1].lower()
75
+ file_size = os.path.getsize(file_path)
76
+
77
+ info = {
78
+ "path": file_path,
79
+ "extension": file_ext,
80
+ "size": file_size,
81
+ "type": None,
82
+ "suggested_tool": None, # ✅ ДОБАВЛЕНО: подсказка для reasoning
83
+ "preview": None
84
+ }
85
+
86
+ # ✅ УЛУЧШЕНО: Более точное определение типов и инструментов
87
+ if file_ext in ['.csv']:
88
+ info["type"] = "table"
89
+ info["suggested_tool"] = "analyze_csv_file"
90
+ elif file_ext in ['.xlsx', '.xls']:
91
+ info["type"] = "excel"
92
+ info["suggested_tool"] = "analyze_excel_file"
93
+ elif file_ext in ['.pdf']:
94
+ info["type"] = "document"
95
+ info["suggested_tool"] = "analyze_pdf_file"
96
+ elif file_ext in ['.docx', '.doc']:
97
+ info["type"] = "document"
98
+ info["suggested_tool"] = "analyze_docx_file"
99
+ elif file_ext in ['.txt', '.md']:
100
+ info["type"] = "text"
101
+ info["suggested_tool"] = "analyze_txt_file"
102
+ elif file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
103
+ info["type"] = "image"
104
+ info["suggested_tool"] = "if its about image itself : analyze_image_file, if its aboutrt content or qa : vision_qa_gemma ONLY"
105
+ else:
106
+ info["type"] = "unknown"
107
+ info["suggested_tool"] = "analyze_txt_file (fallback)"
108
+
109
+ # Безопасное превью для небольших текстовых файлов
110
+ if file_ext == '.txt' and file_size < 1000:
111
+ try:
112
+ with open(file_path, 'r', encoding='utf-8') as f:
113
+ content = f.read()
114
+ info["preview"] = content[:200] + "..." if len(content) > 200 else content
115
+ except Exception as e:
116
+ info["preview"] = f"Error reading file: {e}"
117
+
118
+ file_info[file_path] = info
119
+
120
+ return file_info
121
+
122
+ #----------------------------------------------WEB BROWSING TOOLS------------------------------------------------#
123
+
124
+
125
+ #WIKIPEDIA SEARCH TOOL
126
+
127
+ @tool
128
+ def wiki_search(
129
+ query: str,
130
+ max_results: int = 3,
131
+ language: str = "en",
132
+ content_chars_max: int = 5000,
133
+ snippet_chars: int = 400,
134
+ ) -> str:
135
+ """
136
+ Search Wikipedia using LangChain's WikipediaLoader.
137
+ Returns a JSON string:
138
+ {
139
+ "query": "...",
140
+ "language": "en",
141
+ "items": [
142
+ {
143
+ "url": "https://en.wikipedia.org/wiki/...",
144
+ "title": "Title",
145
+ "snippet": "First N chars of page content",
146
+ "page_content": "...(clipped to content_chars_max)..."
147
+ }
148
+ ]
149
+ }
150
+ """
151
+ try:
152
+ docs = WikipediaLoader(
153
+ query=query,
154
+ load_max_docs=max_results,
155
+ lang=language,
156
+ doc_content_chars_max=content_chars_max,
157
+ ).load()
158
+
159
+ items: List[dict] = []
160
+ seen_urls = set()
161
+
162
+ for d in docs:
163
+ url = d.metadata.get("source") or ""
164
+ if not url or url in seen_urls:
165
+ continue
166
+ seen_urls.add(url)
167
+
168
+ title = d.metadata.get("title") or ""
169
+ page_content = d.page_content or ""
170
+ snippet = _clip(page_content, snippet_chars)
171
+
172
+ items.append(
173
+ {
174
+ "url": url,
175
+ "title": title,
176
+ "snippet": snippet,
177
+ "page_content": page_content, # уже ограничен doc_content_chars_max
178
+ }
179
+ )
180
+
181
+ payload = {
182
+ "query": query,
183
+ "language": language,
184
+ "items": items,
185
+ }
186
+ return json.dumps(payload)
187
+
188
+ except Exception as e:
189
+ return json.dumps({"error": str(e), "query": query, "language": language})
190
+
191
+ #TAVILY WEB SEARCH TOOL
192
+
193
+ def _domain(url: str) -> str:
194
+ """Утилита: вытаскиваем домен в виде 'site.tld' (без поддоменов)."""
195
+ ext = tldextract.extract(url)
196
+ return ".".join([p for p in (ext.domain, ext.suffix) if p])
197
+
198
+ @tool
199
+ def web_search(
200
+ query: str,
201
+ max_results: int = 5, # [ИЗМЕНЕНИЕ] параметризуем число результатов (было зашито 3)
202
+ unique_domains: int = 5, # [ИЗМЕНЕНИЕ] хотим максимум N разных доменов (борьба с дубликатами)
203
+ snippet_chars: int = 400, # [ИЗМЕНЕНИЕ] ограничиваем длину сниппета
204
+ include_domains: Optional[List[str]] = None, # [ИЗМЕНЕНИЕ] вайтлист доменов
205
+ exclude_domains: Optional[List[str]] = None, # [ИЗМЕНЕНИЕ] блэклист доменов
206
+ ) -> str:
207
+ """
208
+ Structured web search via Tavily.
209
+
210
+ Возвращает JSON-строку такого вида:
211
+ {
212
+ "query": "...",
213
+ "provider": "tavily",
214
+ "items": [
215
+ {
216
+ "url": "...",
217
+ "title": "...",
218
+ "snippet": "...",
219
+ "published": "2024-05-01T10:00:00Z", # если Tavily отдал
220
+ "source": "example.com" # домен
221
+ }
222
+ ]
223
+ }
224
+ """
225
+ # [ИЗМЕНЕНИЕ] раньше возвращалась сыровая строка с разметкой; теперь — строгий JSON для удобства парсинга
226
+ try:
227
+ # [ИЗМЕНЕНИЕ] используем официальный LangChain-тул, но берём больше (max_results), чтобы потом отфильтровать домены
228
+ raw_results = TavilySearchResults(max_results=max_results).invoke(query)
229
+
230
+ items: List[dict] = []
231
+ seen_urls: set[str] = set()
232
+ seen_domains: set[str] = set()
233
+
234
+ inc = set(include_domains or []) # [ИЗМЕНЕНИЕ] поддержка фильтров доменов (whitelist)
235
+ exc = set(exclude_domains or []) # [ИЗМЕНЕНИЕ] поддержка фильтров доменов (blacklist)
236
+
237
+ for r in raw_results:
238
+ url = (r.get("url") or "").strip()
239
+ if not url:
240
+ continue
241
+
242
+ dom = _domain(url)
243
+
244
+ # [ИЗМЕНЕНИЕ] применяем include/exclude-фильтры доменов
245
+ if inc and dom not in inc:
246
+ continue
247
+ if dom in exc:
248
+ continue
249
+
250
+ # [ИЗМЕНЕНИЕ] дедупликация ссылок
251
+ if url in seen_urls:
252
+ continue
253
+
254
+ # [ИЗМЕНЕНИЕ] ограничиваем разнообразие доменов (часто Tavily даёт много результатов с одного сайта)
255
+ if unique_domains > 0 and dom in seen_domains:
256
+ # если домен уже встречался и лимит по доменам строгий, пропускаем
257
+ pass
258
+ else:
259
+ # засчитываем домен как использованный
260
+ seen_domains.add(dom)
261
+
262
+ title = (r.get("title") or "").strip()
263
+ content = r.get("content") or r.get("snippet") or ""
264
+ snippet = _clip(content, snippet_chars) # [ИЗМЕНЕНИЕ] делаем аккуратный сниппет
265
+ published = r.get("published_date") or r.get("created_at") # [ИЗМЕНЕНИЕ] пытаемся достать дату
266
+
267
+ items.append(
268
+ {
269
+ "url": url,
270
+ "title": title,
271
+ "snippet": snippet,
272
+ "published": published,
273
+ "source": dom, # [ИЗМЕНЕНИЕ] явный домен — удобно для форматтера/критика
274
+ }
275
+ )
276
+ seen_urls.add(url)
277
+
278
+ # [ИЗМЕНЕНИЕ] если мы уже собрали нужное число результатов после фильтрации — выходим
279
+ if len(items) >= max_results:
280
+ break
281
+
282
+ payload = {
283
+ "query": query,
284
+ "provider": "tavily",
285
+ "items": items,
286
+ }
287
+ return json.dumps(payload)
288
+
289
+ except Exception as e:
290
+ # [ИЗМЕНЕНИЕ] единый формат ошибок в JSON — проще логировать и обрабатывать в агенте
291
+ return json.dumps({"error": str(e), "query": query, "provider": "tavily"})
292
+
293
+
294
+ #ARXIV SEARCH TOOL
295
+
296
+ @tool
297
+ def arxiv_search(
298
+ query: str,
299
+ max_results: int = 5,
300
+ ) -> str:
301
+ """
302
+ Поиск по arXiv через LangChain ArxivLoader.
303
+
304
+ [ИЗМЕНЕНИЕ] Возвращает **строгий JSON** вида:
305
+ {
306
+ "query": "...",
307
+ "provider": "arxiv",
308
+ "items": [
309
+ {
310
+ "title": "...",
311
+ "authors": ["A. Author","B. Author"],
312
+ "published": "YYYY-MM-DDTHH:MM:SS",
313
+ "journal_ref": "…", # если есть
314
+ "comment": "…", # если есть
315
+ "snippet": "first N chars of summary",
316
+ "summary": "… (может быть клипнут ArxivLoader'ом по умолчанию)"
317
+ }
318
+ ]
319
+ }
320
+ """
321
+ try:
322
+ docs = ArxivLoader(
323
+ query=query,
324
+ load_max_docs=max_results,
325
+ ).load()
326
+
327
+ items: List[dict] = []
328
+
329
+ for d in docs:
330
+ md = d.metadata or {}
331
+
332
+ title = md.get("Title") or md.get("title") or ""
333
+ authors = md.get("Authors") or md.get("authors") or []
334
+ if isinstance(authors, str):
335
+ authors = [a.strip() for a in authors.split(",") if a.strip()]
336
+
337
+ published = _parse_dt(md.get("Published") or md.get("published"))
338
+ summary = d.page_content or ""
339
+
340
+ items.append(
341
+ {
342
+ "title": title,
343
+ "authors": authors,
344
+ "published": published,
345
+ "summary": summary,
346
+ }
347
+ )
348
+
349
+ if len(items) >= max_results:
350
+ break
351
+
352
+ payload = {
353
+ "query": query,
354
+ "provider": "arxiv",
355
+ "items": items,
356
+ }
357
+ return json.dumps(payload)
358
+
359
+ except Exception as e:
360
+ return json.dumps({"error": str(e), "query": query, "provider": "arxiv"})
361
+
362
+
363
+
364
+ #----------------------------------------------MATH TOOLS------------------------------------------------#
365
+
366
+ @tool
367
+ def add(a: float, b: float) -> float:
368
+ """Returns the sum of two numbers.
369
+ Example: add(2, 3) -> 5
370
+ """
371
+ return a + b
372
+
373
+ @tool
374
+ def subtract(a: float, b: float) -> float:
375
+ """Returns the difference of two numbers.
376
+ Example: subtract(5, 3) -> 2
377
+ """
378
+ return a - b
379
+
380
+ @tool
381
+ def multiply(a: float, b: float) -> float:
382
+ """Returns the product of two numbers.
383
+ Example: multiply(2, 3) -> 6
384
+ """
385
+ return a * b
386
+
387
+ @tool
388
+ def divide(a: float, b: float) -> float:
389
+ """Returns the quotient of two numbers.
390
+ Example: divide(6, 3) -> 2
391
+ """
392
+ if b == 0:
393
+ raise ValueError("Cannot divide by zero.")
394
+ return a / b
395
+
396
+ @tool
397
+ def power(a: float, b: float) -> float:
398
+ """Returns a raised to the power of b.
399
+ Example: power(2, 3) -> 8
400
+ """
401
+ return a ** b
402
+
403
+
404
+ #----------------------------------------------FILE PROCESSING TOOLS------------------------------------------------#
405
+
406
+ @tool
407
+ def analyze_csv_file(file_path: str, preview_rows: int = 20) -> str:
408
+ """
409
+ Analyze a CSV file: returns JSON with {kind, path, shape, columns, head, numeric_summary}.
410
+ - preview_rows: number of rows for preview (head)
411
+ """
412
+ if not os.path.exists(file_path):
413
+ return json.dumps({"error": "file not found", "path": file_path})
414
+ try:
415
+ df = pd.read_csv(file_path)
416
+ head = df.head(preview_rows).to_dict(orient="records")
417
+ numeric = df.select_dtypes("number").describe().to_dict()
418
+ payload = {
419
+ "kind": "csv",
420
+ "path": file_path,
421
+ "shape": list(df.shape),
422
+ "columns": list(map(str, df.columns)),
423
+ "head": head,
424
+ "numeric_summary": numeric, # {col: {count, mean, std, ...}}
425
+ }
426
+ return json.dumps(payload)
427
+ except Exception as e:
428
+ return json.dumps({"error": str(e), "path": file_path})
429
+
430
+ @tool
431
+ def analyze_excel_file(file_path: str, sheet: int | str | None = None, preview_rows: int = 20, list_sheets: bool = True) -> str:
432
+ """
433
+ Analyze an Excel file: {kind, path, sheets?, active_sheet, shape, columns, head}.
434
+ - sheet: sheet index or name (None -> first sheet)
435
+ - list_sheets: include all sheet names
436
+ """
437
+ if not os.path.exists(file_path):
438
+ return json.dumps({"error": "file not found", "path": file_path})
439
+ try:
440
+ xls = pd.ExcelFile(file_path)
441
+ target = sheet if sheet is not None else 0
442
+ df = pd.read_excel(xls, sheet_name=target)
443
+ head = df.head(preview_rows).to_dict(orient="records")
444
+ payload = {
445
+ "kind": "excel",
446
+ "path": file_path,
447
+ "active_sheet": target if isinstance(target, int) else str(target),
448
+ "shape": list(df.shape),
449
+ "columns": list(map(str, df.columns)),
450
+ "head": head,
451
+ }
452
+ if list_sheets:
453
+ payload["sheets"] = list(map(str, xls.sheet_names))
454
+ return json.dumps(payload)
455
+ except Exception as e:
456
+ return json.dumps({"error": str(e), "path": file_path})
457
+
458
+ @tool
459
+ def analyze_docx_file(file_path: str, max_chars: int = 20000, join_with: str = "\n") -> str:
460
+ """
461
+ Extract text from DOCX: {kind, path, paragraphs, text[:max_chars]}.
462
+ """
463
+ if not os.path.exists(file_path):
464
+ return json.dumps({"error": "file not found", "path": file_path})
465
+ try:
466
+ from docx import Document # pip install python-docx
467
+ except Exception as e:
468
+ return json.dumps({"error": f"python-docx not installed: {e}"})
469
+ try:
470
+ doc = Document(file_path)
471
+ paras = [p.text for p in doc.paragraphs if p.text is not None]
472
+ text = join_with.join(paras)
473
+ payload = {
474
+ "kind": "docx",
475
+ "path": file_path,
476
+ "paragraphs": len(paras),
477
+ "text": text[:max_chars],
478
+ "length": len(text),
479
+ }
480
+ return json.dumps(payload)
481
+ except Exception as e:
482
+ return json.dumps({"error": str(e), "path": file_path})
483
+
484
+
485
+ @tool
486
+ def analyze_txt_file(file_path: str, max_chars: int = 20000) -> str:
487
+ """
488
+ Read plain text: {kind, path, encoding_guess, text[:max_chars], length}.
489
+ """
490
+ if not os.path.exists(file_path):
491
+ return json.dumps({"error": "file not found", "path": file_path})
492
+ try:
493
+ text, enc = _read_text_best_effort(file_path, max_chars=max_chars)
494
+ payload = {
495
+ "kind": "txt",
496
+ "path": file_path,
497
+ "encoding_guess": enc,
498
+ "text": text,
499
+ "length": os.path.getsize(file_path),
500
+ }
501
+ return json.dumps(payload)
502
+ except Exception as e:
503
+ return json.dumps({"error": str(e), "path": file_path})
504
+
505
+ @tool
506
+ def analyze_pdf_file(file_path: str, max_chars: int = 20000) -> str:
507
+ """
508
+ Extract text & page count from PDF: {kind, path, pages, text[:max_chars]}.
509
+ Uses pdfminer.six for text and page counting.
510
+ """
511
+ if not os.path.exists(file_path):
512
+ return json.dumps({"error": "file not found", "path": file_path})
513
+ try:
514
+ # text
515
+ from pdfminer.high_level import extract_text
516
+ text = extract_text(file_path) or ""
517
+ # pages
518
+ from pdfminer.pdfpage import PDFPage
519
+ with open(file_path, "rb") as f:
520
+ pages = sum(1 for _ in PDFPage.get_pages(f))
521
+ payload = {
522
+ "kind": "pdf",
523
+ "path": file_path,
524
+ "pages": pages,
525
+ "text": text[:max_chars],
526
+ "length": len(text),
527
+ }
528
+ return json.dumps(payload)
529
+ except Exception as e:
530
+ return json.dumps({"error": str(e), "path": file_path})
531
+
532
+
533
+ #----------------------------------------------IMAGE PROCESSING TOOLS------------------------------------------------#
534
+
535
+ @tool
536
+ def analyze_image_file(file_path: str, ocr: bool = False, lang: Optional[str] = None, max_ocr_chars: int = 10000) -> str:
537
+ """
538
+ Analyze image: {kind, path, format, mode, size, mean_brightness, exif?, ocr_text?}.
539
+ - ocr: optional Tesseract OCR (pip install pytesseract + tesseract)
540
+ """
541
+ if not os.path.exists(file_path):
542
+ return json.dumps({"error": "file not found", "path": file_path})
543
+ try:
544
+ img = Image.open(file_path)
545
+ stat = ImageStat.Stat(img.convert("L"))
546
+ mean_brightness = float(stat.mean[0]) # 0..255
547
+ payload = {
548
+ "kind": "image",
549
+ "path": file_path,
550
+ "format": img.format,
551
+ "mode": img.mode,
552
+ "size": list(img.size), # [width, height]
553
+ "mean_brightness": mean_brightness,
554
+ }
555
+ exif = _exif_dict(img)
556
+ if exif:
557
+ payload["exif"] = exif
558
+
559
+ if ocr:
560
+ try:
561
+ import pytesseract
562
+ conf = {}
563
+ if lang:
564
+ conf["lang"] = lang
565
+ text = pytesseract.image_to_string(img, **conf) or ""
566
+ payload["ocr_text"] = text[:max_ocr_chars]
567
+ payload["ocr_length"] = len(text)
568
+ except Exception as e:
569
+ payload["ocr_error"] = str(e)
570
+
571
+ return json.dumps(payload)
572
+ except Exception as e:
573
+ return json.dumps({"error": str(e), "path": file_path})
574
+
575
+
576
+
577
+
578
+ # ------------------------- helpers for QA image TOOL -------------------------
579
+
580
+ def _configure():
581
+ api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GENAI_API_KEY")
582
+ if not api_key:
583
+ raise RuntimeError("Missing GOOGLE_API_KEY (or GENAI_API_KEY) in environment")
584
+ genai.configure(api_key=api_key)
585
+
586
+ def _image_bytes_to_part(img_bytes: bytes, mime: str = "image/png") -> Dict[str, Any]:
587
+ # формат, который понимает genai.generate_content
588
+ return {"mime_type": mime, "data": base64.b64encode(img_bytes).decode("utf-8")}
589
+
590
+ def _ensure_png_bytes(img: Image.Image, max_pixels: int = 25_000_000) -> bytes:
591
+ # мягко даунскейлим огромные изображения (защита от "image bomb")
592
+ w, h = img.size
593
+ if w * h > max_pixels:
594
+ scale = (max_pixels / (w * h)) ** 0.5
595
+ nw, nh = max(1, int(w * scale)), max(1, int(h * scale))
596
+ img = img.resize((nw, nh), Image.LANCZOS)
597
+
598
+ # приводим к PNG (надёжно для SDK)
599
+ buf = io.BytesIO()
600
+ img.save(buf, format="PNG", optimize=True)
601
+ return buf.getvalue()
602
+
603
+ def _load_image_as_png_bytes_from_path(path: str) -> bytes:
604
+ if not os.path.exists(path):
605
+ raise FileNotFoundError(f"Image not found: {path}")
606
+ img = Image.open(path)
607
+ return _ensure_png_bytes(img)
608
+
609
+ def _load_image_as_png_bytes_from_b64(b64: str) -> bytes:
610
+ raw = base64.b64decode(b64, validate=True)
611
+ img = Image.open(io.BytesIO(raw))
612
+ return _ensure_png_bytes(img)
613
+
614
+ def _clean_json_text(s: str) -> str:
615
+ # вычищаем обёртки ```json ... ``` и забираем объект { ... }
616
+ s = s.strip()
617
+ if s.startswith("```"):
618
+ s = s.strip("`").replace("json", "", 1).strip()
619
+ # вырезать по внешним фигурным скобкам
620
+ start = s.find("{")
621
+ end = s.rfind("}")
622
+ if start != -1 and end != -1 and end > start:
623
+ return s[start:end+1]
624
+ return s
625
+
626
+ _SINGLE_IMAGE_QA_PROMPT = (
627
+ "You will be given ONE image and a user question about it.\n"
628
+ "Answer STRICTLY and CONCISELY based only on the image content.\n"
629
+ "If the image does not contain enough information to answer, reply 'not enough information'.\n"
630
+ "If the answer is numeric, include units if visible.\n"
631
+ "Return ONLY valid JSON with the schema:\n"
632
+ "{\"answer\": string}\n"
633
+ )
634
+
635
+ def _call_model(parts: List[Any], temperature: float) -> Dict[str, Any]:
636
+ MODEL_NAME = "gemma-3-27b-it"
637
+ model = genai.GenerativeModel(MODEL_NAME)
638
+ resp = model.generate_content(parts, generation_config={"temperature": temperature})
639
+ text = (resp.text or "").strip()
640
+
641
+ # пробуем сразу распарсить
642
+ try:
643
+ return json.loads(_clean_json_text(text))
644
+ except Exception:
645
+ # второй шанс: попросим модель вернуть строгий JSON
646
+ fixer = genai.GenerativeModel(MODEL_NAME)
647
+ fix_prompt = (
648
+ "Convert the following text into STRICT valid JSON matching schema {\"answer\": string}. "
649
+ "Return ONLY JSON, no extra text:\n" + text
650
+ )
651
+ fix_resp = fixer.generate_content([{"text": fix_prompt}])
652
+ return json.loads(_clean_json_text((fix_resp.text or "").strip()))
653
+
654
+ # --------------------------- TOOL ---------------------------
655
+
656
+ @tool
657
+ def vision_qa_gemma(
658
+ question: str,
659
+ image_path: Optional[str] = None,
660
+ image_base64: Optional[str] = None,
661
+ temperature: float = 0.2,
662
+ ) -> str:
663
+ """
664
+ Vision QA with Google GenAI (Gemma/Gemini). Returns JSON: {"answer": "..."}.
665
+
666
+ Args:
667
+ question: user question about the image.
668
+ image_path: local file path to the image (PNG/JPG/...).
669
+ image_base64: base64-encoded image (if no path).
670
+ temperature: decoding temperature (default 0.2).
671
+
672
+ Exactly ONE of (image_path, image_base64) must be provided.
673
+ """
674
+ import json as _json
675
+ try:
676
+ _configure()
677
+ if bool(image_path) == bool(image_base64):
678
+ return _json.dumps({"error": "Provide exactly ONE of image_path or image_base64"})
679
+
680
+ if image_path:
681
+ img_bytes = _load_image_as_png_bytes_from_path(image_path)
682
+ else:
683
+ img_bytes = _load_image_as_png_bytes_from_b64(image_base64)
684
+
685
+ parts = [
686
+ {"text": _SINGLE_IMAGE_QA_PROMPT + "\nQuestion: " + question.strip()},
687
+ _image_bytes_to_part(img_bytes, "image/png"),
688
+ ]
689
+
690
+ data = _call_model(parts, temperature)
691
+ # финальная защита: оставляем только "answer"
692
+ answer = data["answer"] if isinstance(data, dict) and "answer" in data else None
693
+ if not isinstance(answer, str):
694
+ answer = str(answer) if answer is not None else "not enough information"
695
+
696
+ return _json.dumps({
697
+ "answer": answer,
698
+ })
699
+
700
+ except Exception as e:
701
+ return _json.dumps({"error": str(e)})
702
+
703
+
704
+ #-------------------------------------------------------------- ADDITIONAL TOOLS -------------------------------------------------------------#
705
+ @tool
706
+ def draw_on_image(
707
+ image_base64: str, drawing_type: str, params: Dict[str, Any]
708
+ ) -> Dict[str, Any]:
709
+ """
710
+ Draw shapes (rectangle, circle, line) or text onto an image.
711
+ Args:
712
+ image_base64 (str): Base64 encoded input image
713
+ drawing_type (str): Drawing type
714
+ params (Dict[str, Any]): Drawing parameters
715
+ Returns:
716
+ Dictionary with result image (base64)
717
+ """
718
+ try:
719
+ img = decode_image(image_base64)
720
+ draw = ImageDraw.Draw(img)
721
+ color = params.get("color", "red")
722
+
723
+ if drawing_type == "rectangle":
724
+ draw.rectangle(
725
+ [params["left"], params["top"], params["right"], params["bottom"]],
726
+ outline=color,
727
+ width=params.get("width", 2),
728
+ )
729
+ elif drawing_type == "circle":
730
+ x, y, r = params["x"], params["y"], params["radius"]
731
+ draw.ellipse(
732
+ (x - r, y - r, x + r, y + r),
733
+ outline=color,
734
+ width=params.get("width", 2),
735
+ )
736
+ elif drawing_type == "line":
737
+ draw.line(
738
+ (
739
+ params["start_x"],
740
+ params["start_y"],
741
+ params["end_x"],
742
+ params["end_y"],
743
+ ),
744
+ fill=color,
745
+ width=params.get("width", 2),
746
+ )
747
+ elif drawing_type == "text":
748
+ font_size = params.get("font_size", 20)
749
+ try:
750
+ font = ImageFont.truetype("arial.ttf", font_size)
751
+ except IOError:
752
+ font = ImageFont.load_default()
753
+ draw.text(
754
+ (params["x"], params["y"]),
755
+ params.get("text", "Text"),
756
+ fill=color,
757
+ font=font,
758
+ )
759
+ else:
760
+ return {"error": f"Unknown drawing type: {drawing_type}"}
761
+
762
+ result_path = save_image(img)
763
+ result_base64 = encode_image(result_path)
764
+ return {"result_image": result_base64}
765
+
766
+ except Exception as e:
767
+ return {"error": str(e)}
768
+
769
+ @tool
770
+ def transform_image(
771
+ image_base64: str, operation: str, params: Optional[Dict[str, Any]] = None
772
+ ) -> Dict[str, Any]:
773
+ """
774
+ Apply transformations: resize, rotate, crop, flip, brightness, contrast, blur, sharpen, grayscale.
775
+ Args:
776
+ image_base64 (str): Base64 encoded input image
777
+ operation (str): Transformation operation
778
+ params (Dict[str, Any], optional): Parameters for the operation
779
+ Returns:
780
+ Dictionary with transformed image (base64)
781
+ """
782
+ try:
783
+ img = decode_image(image_base64)
784
+ params = params or {}
785
+
786
+ if operation == "resize":
787
+ img = img.resize(
788
+ (
789
+ params.get("width", img.width // 2),
790
+ params.get("height", img.height // 2),
791
+ )
792
+ )
793
+ elif operation == "rotate":
794
+ img = img.rotate(params.get("angle", 90), expand=True)
795
+ elif operation == "crop":
796
+ img = img.crop(
797
+ (
798
+ params.get("left", 0),
799
+ params.get("top", 0),
800
+ params.get("right", img.width),
801
+ params.get("bottom", img.height),
802
+ )
803
+ )
804
+ elif operation == "flip":
805
+ if params.get("direction", "horizontal") == "horizontal":
806
+ img = img.transpose(Image.FLIP_LEFT_RIGHT)
807
+ else:
808
+ img = img.transpose(Image.FLIP_TOP_BOTTOM)
809
+ elif operation == "adjust_brightness":
810
+ img = ImageEnhance.Brightness(img).enhance(params.get("factor", 1.5))
811
+ elif operation == "adjust_contrast":
812
+ img = ImageEnhance.Contrast(img).enhance(params.get("factor", 1.5))
813
+ elif operation == "blur":
814
+ img = img.filter(ImageFilter.GaussianBlur(params.get("radius", 2)))
815
+ elif operation == "sharpen":
816
+ img = img.filter(ImageFilter.SHARPEN)
817
+ elif operation == "grayscale":
818
+ img = img.convert("L")
819
+ else:
820
+ return {"error": f"Unknown operation: {operation}"}
821
+
822
+ result_path = save_image(img)
823
+ result_base64 = encode_image(result_path)
824
+ return {"transformed_image": result_base64}
825
+
826
+ except Exception as e:
827
+ return {"error": str(e)}
828
+
829
+ @tool
830
+ def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
831
+ """
832
+ Save content to a file and return the path.
833
+ Args:
834
+ content (str): the content to save to the file
835
+ filename (str, optional): the name of the file. If not provided, a random name file will be created.
836
+ """
837
+ temp_dir = tempfile.gettempdir()
838
+ if filename is None:
839
+ temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
840
+ filepath = temp_file.name
841
+ else:
842
+ filepath = os.path.join(temp_dir, filename)
843
+
844
+ with open(filepath, "w") as f:
845
+ f.write(content)
846
+
847
+ return f"File saved to {filepath}. You can read this file to process its contents."
848
+
849
+
850
+ import requests
851
+
852
+ @tool
853
+ def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
854
+ """
855
+ Download a file from a URL and save it to a temporary location.
856
+ Args:
857
+ url (str): the URL of the file to download.
858
+ filename (str, optional): the name of the file. If not provided, a random name file will be created.
859
+ """
860
+ try:
861
+ # Parse URL to get filename if not provided
862
+ if not filename:
863
+ path = urlparse(url).path
864
+ filename = os.path.basename(path)
865
+ if not filename:
866
+ filename = f"downloaded_{uuid.uuid4().hex[:8]}"
867
+
868
+ # Create temporary file
869
+ temp_dir = tempfile.gettempdir()
870
+ filepath = os.path.join(temp_dir, filename)
871
+
872
+ # Download the file
873
+ response = requests.get(url, stream=True)
874
+ response.raise_for_status()
875
+
876
+ # Save the file
877
+ with open(filepath, "wb") as f:
878
+ for chunk in response.iter_content(chunk_size=8192):
879
+ f.write(chunk)
880
+
881
+ return f"File downloaded to {filepath}. You can read this file to process its contents."
882
+ except Exception as e:
883
+ return f"Error downloading file: {str(e)}"
src/utils/__init__.py ADDED
File without changes
src/utils/code_run.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional, Literal, Dict, Any
2
+ from pydantic import BaseModel, Field
3
+ import platform, sys
4
+
5
+ class Limits(BaseModel):
6
+ timeout_seconds: int = Field(12, ge=1, le=120)
7
+ max_stdout_chars: int = Field(10000, ge=256, le=200_000)
8
+ max_stderr_chars: int = Field(10000, ge=256, le=200_000)
9
+ max_plots: int = Field(4, ge=0, le=10)
10
+ max_dataframes: int = Field(3, ge=0, le=10)
11
+ max_df_rows: int = Field(20, ge=1, le=200)
12
+ max_df_cols: int = Field(20, ge=1, le=200)
13
+ plot_dpi: int = Field(120, ge=72, le=300)
14
+ max_pixels: int = Field(25_000_000, ge=1) # если вдруг юзер генерит большие изображения
15
+
16
+ class CodeRunRequest(BaseModel):
17
+ language: Literal["python"] = "python"
18
+ code: str
19
+ # Явный allowlist модулей (верхнеуровневые имена)
20
+ allowed_modules: List[str] = Field(
21
+ default_factory=lambda: [
22
+ "math","random","statistics","datetime","re","json","fractions","decimal",
23
+ "numpy","pandas","cmath","matplotlib","matplotlib.pyplot", "seaborn","sklearn","sklearn.datasets","sklearn.model_selection", "sympy"
24
+ ]
25
+ )
26
+ # Флаги, что возвращать
27
+ return_plots: bool = True
28
+ return_dataframes: bool = True
29
+ # Ограничения
30
+ limits: Limits = Field(default_factory=Limits)
31
+
32
+ class PlotArtifact(BaseModel):
33
+ data_base64: str
34
+ format: Literal["png"] = "png"
35
+
36
+ class DataFrameArtifact(BaseModel):
37
+ name: str
38
+ head: List[Dict[str, Any]]
39
+ shape: List[int]
40
+ dtypes: Dict[str, str]
41
+
42
+ class EnvInfo(BaseModel):
43
+ python: str = Field(default_factory=lambda: sys.version.split()[0])
44
+ numpy: Optional[str] = None
45
+ pandas: Optional[str] = None
46
+ platform: str = Field(default_factory=platform.platform)
47
+
48
+ class CodeRunResult(BaseModel):
49
+ execution_id: str
50
+ status: Literal["success","error","timeout"]
51
+ stdout: str = ""
52
+ stderr: str = ""
53
+ result_repr: Optional[str] = None
54
+ plots: List[PlotArtifact] = Field(default_factory=list)
55
+ dataframes: List[DataFrameArtifact] = Field(default_factory=list)
56
+ env: EnvInfo
src/utils/image_processing.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import base64
4
+ import uuid
5
+ from PIL import Image
6
+
7
+ # Helper functions for image processing
8
+ def encode_image(image_path: str) -> str:
9
+ """Convert an image file to base64 string."""
10
+ with open(image_path, "rb") as image_file:
11
+ return base64.b64encode(image_file.read()).decode("utf-8")
12
+
13
+
14
+ def decode_image(base64_string: str) -> Image.Image:
15
+ """Convert a base64 string to a PIL Image."""
16
+ image_data = base64.b64decode(base64_string)
17
+ return Image.open(io.BytesIO(image_data))
18
+
19
+
20
+ def save_image(image: Image.Image, directory: str = "image_outputs") -> str:
21
+ """Save a PIL Image to disk and return the path."""
22
+ os.makedirs(directory, exist_ok=True)
23
+ image_id = str(uuid.uuid4())
24
+ image_path = os.path.join(directory, f"{image_id}.png")
25
+ image.save(image_path)
26
+ return image_path
src/utils/utils.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
2
+ from schemas import ComplexityLevel, ExecutionReport
3
+ from prompts.prompts import COMPLEXITY_ASSESSOR_PROMPT
4
+ from config import llm
5
+ from state import AgentState
6
+
7
+ def clean_message_history(messages):
8
+ """
9
+ Очищает историю сообщений от неполных циклов tool_calls/responses.
10
+ Удаляет AIMessage с tool_calls, если нет соответствующих ToolMessage.
11
+ """
12
+ cleaned_messages = []
13
+ i = 0
14
+
15
+ while i < len(messages):
16
+ msg = messages[i]
17
+
18
+ # Если это AIMessage с tool_calls
19
+ if hasattr(msg, 'tool_calls') and msg.tool_calls:
20
+ # Ищем соответствующие ToolMessage
21
+ tool_call_ids = {tc['id'] for tc in msg.tool_calls}
22
+ found_responses = set()
23
+
24
+ # Проверяем следующие сообщения на наличие ответов
25
+ j = i + 1
26
+ while j < len(messages) and isinstance(messages[j], ToolMessage):
27
+ if messages[j].tool_call_id in tool_call_ids:
28
+ found_responses.add(messages[j].tool_call_id)
29
+ j += 1
30
+
31
+ # Если все tool_calls имеют ответы, добавляем весь блок
32
+ if found_responses == tool_call_ids:
33
+ # Добавляем AIMessage и все соответствующие ToolMessage
34
+ cleaned_messages.append(msg)
35
+ for k in range(i + 1, j):
36
+ cleaned_messages.append(messages[k])
37
+ i = j
38
+ else:
39
+ # Пропускаем неполный блок
40
+ print(f"Removing incomplete tool call block: {tool_call_ids - found_responses}")
41
+ i = j
42
+ else:
43
+ # Обычное сообщение - добавляем
44
+ cleaned_messages.append(msg)
45
+ i += 1
46
+
47
+ return cleaned_messages
48
+
49
+ def format_final_answer(report: ExecutionReport, complexity: dict) -> str:
50
+ """Format the final answer based on complexity and report content."""
51
+
52
+ if complexity.level == 'simple':
53
+ # For simple queries, just return the answer
54
+ return f"FINAL ANSWER: {report.final_answer}"
55
+
56
+ # For complex queries, provide more detailed response
57
+ formatted = f"""FINAL ANSWER: {report.final_answer}
58
+
59
+ SUMMARY:
60
+ {report.query_summary}
61
+
62
+ KEY FINDINGS:
63
+ {chr(10).join(f"• {finding}" for finding in report.key_findings)}"""
64
+
65
+ if report.data_sources:
66
+ formatted += f"""
67
+
68
+ SOURCES:
69
+ {chr(10).join(f"• {source}" for source in report.data_sources[:5])}""" # Limit to 5 sources
70
+
71
+ if report.limitations:
72
+ formatted += f"""
73
+
74
+ LIMITATIONS:
75
+ {chr(10).join(f"• {limitation}" for limitation in report.limitations)}"""
76
+
77
+ return formatted
78
+
79
+
80
+ def complexity_assessor(state: AgentState) -> AgentState:
81
+ """Assess query complexity and determine if planning is needed."""
82
+ print("=== COMPLEXITY ASSESSMENT ===")
83
+
84
+ complexity_llm = llm.with_structured_output(ComplexityLevel)
85
+
86
+ assessment_message = [
87
+ SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
88
+ HumanMessage(content=f"Query: {state['query']}")
89
+ ]
90
+
91
+ assessment = complexity_llm.invoke(assessment_message)
92
+
93
+ print(f"Complexity: {assessment.level}")
94
+ print(f"Needs planning: {assessment.needs_planning}")
95
+ print(f"Reasoning: {assessment.reasoning}")
96
+
97
+ return {
98
+ "complexity_assessment": assessment,
99
+ "messages": state["messages"] + assessment_message
100
+ }
src/workflow_test.ipynb ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "d:\\REGNUM_SPECTRARUM_updated\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "from agent import build_workflow\n",
19
+ "from config import config"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 3,
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "graph = build_workflow()"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 4,
34
+ "metadata": {},
35
+ "outputs": [
36
+ {
37
+ "name": "stdout",
38
+ "output_type": "stream",
39
+ "text": [
40
+ "=== USER QUERY TRANSFERED TO AGENT ===\n",
41
+ "=== COMPLEXITY ASSESSMENT ===\n",
42
+ "Complexity: simple\n",
43
+ "Needs planning: False\n",
44
+ "Reasoning: This query is a straightforward arithmetic calculation that can be answered immediately without any tools or complex reasoning.\n",
45
+ "=== SIMPLE EXECUTION ===\n",
46
+ "=== GENERATING EXECUTION REPORT ===\n",
47
+ "Report generated - Confidence: high\n",
48
+ "Key findings: 2\n",
49
+ "Data sources: 0\n",
50
+ "query_summary='The user requested the result of the arithmetic expression 2 + 2 - 2 + 2.' approach_used='The query was evaluated using basic arithmetic operations, following the standard order of operations.' tools_executed=[] key_findings=['The expression was simplified step-by-step: 2 + 2 = 4, then 4 - 2 = 2, and finally 2 + 2 = 4.', 'The final result of the expression is 4.'] data_sources=[] assumptions_made=['The user intended to use standard arithmetic rules without any additional context or modifications.'] confidence_level='high' limitations=['The query was straightforward, and no complex tools or external data sources were required.', 'The execution context did not involve any ambiguity or alternative interpretations.'] final_answer='4'\n",
51
+ "=== ENHANCED ANSWER CRITIQUE ===\n",
52
+ "Quality Score: 8/10\n",
53
+ "Complete: True\n",
54
+ "Accurate: True\n",
55
+ "=== REPLAN DECISION ===\n",
56
+ "Iteration: 1/10\n",
57
+ "Quality score: 8\n",
58
+ "Needs replanning: False\n",
59
+ "Quality acceptable, ending execution\n"
60
+ ]
61
+ }
62
+ ],
63
+ "source": [
64
+ "result = graph.invoke({\"query\" : \"2+2-2+2?\", \"current_step\": 0, \"reasoning_done\": False, \"files\" : [], \"files_contents\" : {}, \"iteration_count\" : 0, \"max_iterations\" : 10, \"plan\" : None} , config = config)"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": null,
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": []
73
+ }
74
+ ],
75
+ "metadata": {
76
+ "kernelspec": {
77
+ "display_name": ".venv",
78
+ "language": "python",
79
+ "name": "python3"
80
+ },
81
+ "language_info": {
82
+ "codemirror_mode": {
83
+ "name": "ipython",
84
+ "version": 3
85
+ },
86
+ "file_extension": ".py",
87
+ "mimetype": "text/x-python",
88
+ "name": "python",
89
+ "nbconvert_exporter": "python",
90
+ "pygments_lexer": "ipython3",
91
+ "version": "3.11.5"
92
+ }
93
+ },
94
+ "nbformat": 4,
95
+ "nbformat_minor": 2
96
+ }
test_folder/test.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ def calculate_area(radius):
4
+ """Calculate the area of a circle given its radius."""
5
+ if radius < 0:
6
+ raise ValueError("Radius cannot be negative")
7
+ return math.pi * (radius ** 2)
8
+
9
+ radius = 5
10
+ area = calculate_area(radius)
11
+ print(area)
test_folder/test_.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
test_folder/test_run.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # === From notebook execution cells ===
4
+ workflow = system.invoke({"query" : "How many cumulative milliliters of fluid is in all the opaque-capped vials without stickers in the 114 version of the kit that was used for the PromethION long-read sequencing in the paper De Novo-Whole Genome Assembly of the Roborovski Dwarf Hamster (Phodopus roborovskii) Genome?", "current_step": 0, "reasoning_done": False, "files" : [], "files_contents" : {}, "iteration_count" : 0, "max_iterations" : 10, "plan" : None} , config = config)
5
+
6
+ for message in workflow["messages"]:
7
+ message.pretty_print()
8
+
9
+ print("\n=== FINAL ANSWER ===")
10
+
11
+ workflow["final_answer"]
12
+
13
+ workflow
14
+
test_folder/test_stable.ipynb ADDED
The diff for this file is too large to render. See raw diff