nynuzz commited on
Commit
f4f3e1f
·
verified ·
1 Parent(s): 37f41de

Update web_search_agent.py

Browse files
Files changed (1) hide show
  1. web_search_agent.py +282 -282
web_search_agent.py CHANGED
@@ -1,282 +1,282 @@
1
- import os
2
- from dotenv import load_dotenv
3
- import operator
4
- from typing import List, TypedDict, Annotated, Dict
5
- from pydantic import BaseModel, Field
6
- from IPython.display import Image, display
7
-
8
- from langchain_openai import ChatOpenAI
9
- from langchain_core.messages import SystemMessage, AIMessage, HumanMessage, ToolMessage
10
- from langgraph.graph import MessagesState, StateGraph, END, START
11
- from langgraph.prebuilt import ToolNode, tools_condition
12
-
13
- # Importiamo i web tools
14
- from web_search_tools import google_search_tool, wikipedia_search_tool, browse_web_page_tool, text_analyzer_tool
15
-
16
-
17
- # Carica le variabili d'ambiente
18
- load_dotenv()
19
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
- OPENAI_API_MODEL = os.getenv("OPENAI_API_WEB_MODEL")
21
-
22
-
23
- # --- 1. Strutture e Stato ---
24
- class ResearchPlan(BaseModel):
25
- """A step-by-step research plan."""
26
- steps: List[str] = Field(description="A list of concise, sequential steps for the research task.")
27
-
28
- class ResearchState(MessagesState):
29
- task: str
30
- plan: ResearchPlan
31
- current_plan_step: int
32
- context_summary: str
33
- step_results: Annotated[List[str], operator.add] # Memoria a lungo termine per i risultati di ogni passo
34
-
35
-
36
- # --- 2. Tool e Modelli ---
37
- llm = ChatOpenAI(model=OPENAI_API_MODEL, api_key=OPENAI_API_KEY, temperature=0)
38
- llm_with_tools = llm.bind_tools([wikipedia_search_tool, browse_web_page_tool])
39
-
40
-
41
- # --- 3. Nodi del Grafo a Pipeline ---
42
- def planning_node(state: ResearchState):
43
- """Node 1: Generate the initial research plan."""
44
- print("--- 📝 PLANNING NODE ---")
45
-
46
- task = state.get('task')
47
- structured_llm = llm.with_structured_output(ResearchPlan)
48
- planning_prompt = f"""
49
- You are an expert and efficient research planner. Your goal is to create the SHORTEST POSSIBLE, logical, step-by-step plan to solve a user's research task.
50
-
51
- **Core Principles:**
52
- 1. **Analyze Complexity**: First, determine if the task is simple or complex.
53
- - A **simple task** can be solved with a single, well-formulated search and analysis (e.g., "Who won the 1998 World Cup?").
54
- - A **complex task** requires finding one piece of information to unlock the next (e.g., "Who is the manager of the team that won the 1998 World Cup?").
55
- 2. **Create the Plan**:
56
- - For a **simple task**, create a plan with ONLY ONE step: a clear instruction to find the final answer.
57
- - For a **complex task**, break it down into the minimum number of sequential steps required. Each step must build upon the previous one.
58
- 3. **Focus on Actions**: Each step should describe an action to find a specific piece of information.
59
-
60
- ---
61
- **Example 1: Simple Task**
62
- * **User Task:** "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
63
- * **Your Output (Plan):**
64
- "steps": [
65
- "Search Wikipedia for the discography of Mercedes Sosa, find all studio albums released between 2000 and 2009, and count them."
66
- ]
67
-
68
- **Example 2: Complex Task**
69
- * **User Task:** "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name."
70
- * **Your Output (Plan):**
71
- "steps": [
72
- "Find the name of the actor who played Ray in the Polish version of 'Everybody Loves Raymond'.",
73
- "Using the actor's name, find their role in the show 'Magda M.' and extract the character's first name."
74
- ]
75
- ---
76
-
77
- Now, analyze the following user task and generate the most efficient, step-by-step research plan.
78
- **User Task:** {task}
79
- **Your Output (Plan):**
80
- """
81
-
82
- response_plan = structured_llm.invoke([SystemMessage(content=planning_prompt)])
83
- print("--- ✅ PLANNING COMPLETE ---")
84
- print("Generated Plan:", response_plan.steps)
85
- return {"plan": response_plan, "current_plan_step": 0}
86
-
87
-
88
- def search_node(state: ResearchState):
89
- """Node 2: Performs a web search for a single step of the plan."""
90
- step_index = state["current_plan_step"]
91
- plan_steps = state["plan"].steps
92
- current_step_instruction = plan_steps[step_index]
93
- context_summary = state["step_results"]
94
-
95
- print(f"--- 🔎 SEARCH NODE (Executing step: '{current_step_instruction}') ---")
96
- query_prompt = f"""
97
- You are an expert at generating search engine queries.
98
- Your goal is to create a single, concise, and effective Google search query to accomplish the given plan step, using the context from previous steps.
99
-
100
- **Current Plan Step to Execute:** "{current_step_instruction}"
101
- **Context from Previous Steps' Findings:**
102
- ---
103
- {context_summary}
104
- ---
105
-
106
- Based on the **Current Plan Step** and the **Context**, generate the single best possible search query to find the next piece of information.
107
- For example, if the context is "The actor is Bartek Kasprzykowski" and the step is "Find his role in Magda M.", a good query would be "Bartek Kasprzykowski role in Magda M.".
108
- """
109
-
110
- # Genera la query
111
- query = llm.invoke([SystemMessage(content=query_prompt)]).content.strip('"')
112
- print(f"--- Generated Context-Aware Query: '{query}' ---")
113
-
114
- # Eseguiamo il tool di ricerca su Google
115
- search_results = google_search_tool.invoke(query)
116
-
117
- # Aggiorniamo lo stato
118
- return {"messages": [AIMessage(content=search_results)]}
119
-
120
-
121
- def browse_node(state: ResearchState):
122
- """Node 3: Analyzes search results and decides which URL to browse, prioritizing Wikipedia."""
123
- # L'ultimo messaggio contiene i risultati della ricerca Google
124
- search_results = state["messages"][-1].content
125
-
126
- print(f"--- 📖 BROWSE NODE (Analyzing search results) ---")
127
-
128
- # Prompt per scegliere l'URL e il tool corretto
129
- browse_prompt = f"""
130
- You are an expert at selecting the best information source.
131
- Given a list of Google search results, your goal is to choose the SINGLE best URL to browse to accomplish the current research step.
132
-
133
- **Current Research Step:** "{state['plan'].steps[state['current_plan_step']]}"
134
-
135
- **Decision Hierarchy (Strict):**
136
- 1. **Wikipedia First**: If a reliable `wikipedia.org` link is present and seems highly relevant to the current step, you **MUST** choose it and call the `wikipedia_search_tool`.
137
- 2. **Browse Other Sources**: If there are no good Wikipedia links, choose the single most promising URL from another reputable source and call the `browse_web_page_tool`.
138
-
139
- **Search Results:**
140
- ---
141
- {search_results}
142
- ---
143
-
144
- Based on the hierarchy and the current research step, which single tool call should you make?
145
- """
146
-
147
- # Invoca l'LLM per ottenere la decisione sulla chiamata al tool
148
- message = llm_with_tools.invoke([SystemMessage(content=browse_prompt)])
149
-
150
- # Controlla se l'LLM ha effettivamente deciso di chiamare un tool
151
- if not hasattr(message, "tool_calls") or not message.tool_calls:
152
- # Fallback: se l'LLM non riesce a decidere, lo segnaliamo per passare avanti
153
- print("--- ⚠️ BROWSE NODE: LLM failed to choose a tool. Skipping browse step. ---")
154
- return {"messages": [AIMessage(content="No relevant page found to browse.")]}
155
-
156
- print(f"--- Browse Node decision: Call '{message.tool_calls[0]['name']}' on '{message.tool_calls[0]['args']}' ---")
157
- return {"messages": message}
158
-
159
-
160
- def step_synthesis_node(state: ResearchState):
161
- """Node 4: Summarize the information from the current step and prepare for the next one."""
162
- print(" --- 🔄 STEP SYNTHESIS NODE ---")
163
-
164
- current_step_instruction = state["plan"].steps[state["current_plan_step"]]
165
- browsed_content = state["messages"][-1].content
166
-
167
- summary_prompt = f"""
168
- You are a factual extractor and research analyst.
169
- Your goal is to extract key pieces of information from the provided content to satisfy a specific sub-task and prepare for the next step.
170
-
171
- **Sub-Task (Instruction to accomplish):** "{current_step_instruction}"
172
-
173
- **Content Gathered in this Step:**
174
- ---
175
- {browsed_content}
176
- ---
177
-
178
- **Analysis:**
179
- 1. **Extract Key Facts**: From the "Content Gathered", pull out the specific names, dates, numbers, or links that directly answer the "Sub-Task".
180
- 2. **Assess Step Completion**: Was the sub-task successfully completed with this information?
181
- 3. **Synthesize for Next Step**: Create a very concise summary of your findings. This summary will be used as context for the next step in the plan. If the sub-task was not completed, state what is still missing.
182
-
183
- **Your Output:**
184
- Provide a concise summary of your findings. For example:
185
- "Successfully found the actor's name: Bartek Kasprzykowski."
186
- or
187
- "Failed to find the specific NASA award number on this page, but confirmed the paper was written by the correct team."
188
- """
189
-
190
- step_summary = llm.invoke([SystemMessage(content=summary_prompt)]).content
191
- print(f"--- ✅ STEP {state['current_plan_step'] + 1} COMPLETE. Summary: '{step_summary}' ---")
192
-
193
- # Aggiunge il riassunto ai risultati a lungo termine e avanza il contatore
194
- return {"step_results": [step_summary], "current_plan_step": state["current_plan_step"] + 1}
195
-
196
-
197
- def final_synthesis_node(state: ResearchState):
198
- """Node 5: Takes all the summarized results from each step and combines them into a complete and final answer for the original task."""
199
- print("--- ✍️ FINAL SYNTHESIS NODE ---")
200
-
201
- # Raccoglie i riassunti di ogni passo dalla memoria a lungo termine dello stato
202
- step_summaries = state.get("step_results", [])
203
-
204
- # Controlla se abbiamo effettivamente dei risultati da sintetizzare
205
- if not step_summaries:
206
- final_report = "The research process concluded, but no conclusive information was gathered to answer the task."
207
- return {"messages": [AIMessage(content=final_report)]}
208
-
209
- # Crea un contesto pulito per l'LLM finale
210
- full_context = "\n\n".join(
211
- [f"Finding from Step {i+1}: {summary}" for i, summary in enumerate(step_summaries)]
212
- )
213
-
214
- # Prompt per la sintesi finale
215
- final_prompt = f"""
216
- You are an expert data analyst and report writer.
217
- Your final and most important task is to synthesize the provided research findings to answer the user's original task with extreme precision.
218
-
219
- **User's Original Task:**
220
- ---
221
- "{state['task']}"
222
- ---
223
-
224
- **Summary of Findings from Each Research Step:**
225
- ---
226
- {full_context}
227
- ---
228
-
229
- **Your Analytical Process (You MUST follow this):**
230
- 1. **Re-read the Original Task**: Pay extremely close attention to all constraints, especially dates, numbers, and specific conditions (e.g., "between 2000 and 2009, included", "first name only").
231
- 2. **Verify Information**: Scan the "Summary of Findings" and ensure you have all the necessary pieces to construct the answer. Do not invent or infer information that is not present.
232
- 3. **Construct the Final Answer**: Write a clear, direct, and accurate answer based solely on the verified findings. Address every part of the user's original task.
233
-
234
- Based on this rigorous process, generate the final answer.
235
- """
236
-
237
- # Usa un LLM (può essere lo stesso o uno diverso) per generare il report finale
238
- final_report = llm.invoke([SystemMessage(content=final_prompt)])
239
- print("--- ✅ FINAL REPORT GENERATED ---")
240
-
241
- # Aggiunge il report finale ai messaggi, che sarà l'output finale del grafo
242
- return {"messages": final_report}
243
-
244
-
245
- # --- 4. Costruzione del Grafo a Pipeline ---
246
- def router(state: ResearchState):
247
- """Decides whether to proceed to the next step or move on to the final summary."""
248
- print("--- 🔍 ROUTER ---")
249
- if state["current_plan_step"] < len(state["plan"].steps):
250
- print(" - Decision: Continue to next pipeline cycle.")
251
- return "continue_pipeline"
252
- else:
253
- print(" - Decision: Plan complete. Proceed to final synthesis.")
254
- return "end_pipeline"
255
-
256
-
257
- builder = StateGraph(ResearchState)
258
- builder.add_node("planning", planning_node)
259
- builder.add_node("search", search_node)
260
- builder.add_node("browse", browse_node)
261
- builder.add_node("tools", ToolNode([wikipedia_search_tool, browse_web_page_tool]))
262
- builder.add_node("synthesis", step_synthesis_node)
263
- builder.add_node("final_synthesizer", final_synthesis_node)
264
-
265
- builder.add_edge(START, "planning")
266
- builder.add_edge("planning", "search")
267
- builder.add_edge("search", "browse")
268
- builder.add_edge("browse", "tools")
269
- builder.add_edge("tools", "synthesis")
270
- # Dopo la sintesi di un passo, il router decide se ricominciare o finire
271
- builder.add_conditional_edges(
272
- "synthesis",
273
- router,
274
- {
275
- "continue_pipeline": "search",
276
- "end_pipeline": "final_synthesizer"
277
- }
278
- )
279
- builder.add_edge("final_synthesizer", END)
280
-
281
- web_search_graph = builder.compile()
282
- display(Image(web_search_graph.get_graph(xray=1).draw_mermaid_png(output_file_path="./web_search_graph.png")))
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import operator
4
+ from typing import List, TypedDict, Annotated, Dict
5
+ from pydantic import BaseModel, Field
6
+ #from IPython.display import Image, display
7
+
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain_core.messages import SystemMessage, AIMessage, HumanMessage, ToolMessage
10
+ from langgraph.graph import MessagesState, StateGraph, END, START
11
+ from langgraph.prebuilt import ToolNode, tools_condition
12
+
13
+ # Importiamo i web tools
14
+ from web_search_tools import google_search_tool, wikipedia_search_tool, browse_web_page_tool, text_analyzer_tool
15
+
16
+
17
+ # Carica le variabili d'ambiente
18
+ load_dotenv()
19
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
+ OPENAI_API_MODEL = os.getenv("OPENAI_API_WEB_MODEL")
21
+
22
+
23
+ # --- 1. Strutture e Stato ---
24
+ class ResearchPlan(BaseModel):
25
+ """A step-by-step research plan."""
26
+ steps: List[str] = Field(description="A list of concise, sequential steps for the research task.")
27
+
28
+ class ResearchState(MessagesState):
29
+ task: str
30
+ plan: ResearchPlan
31
+ current_plan_step: int
32
+ context_summary: str
33
+ step_results: Annotated[List[str], operator.add] # Memoria a lungo termine per i risultati di ogni passo
34
+
35
+
36
+ # --- 2. Tool e Modelli ---
37
+ llm = ChatOpenAI(model=OPENAI_API_MODEL, api_key=OPENAI_API_KEY, temperature=0)
38
+ llm_with_tools = llm.bind_tools([wikipedia_search_tool, browse_web_page_tool])
39
+
40
+
41
+ # --- 3. Nodi del Grafo a Pipeline ---
42
+ def planning_node(state: ResearchState):
43
+ """Node 1: Generate the initial research plan."""
44
+ print("--- 📝 PLANNING NODE ---")
45
+
46
+ task = state.get('task')
47
+ structured_llm = llm.with_structured_output(ResearchPlan)
48
+ planning_prompt = f"""
49
+ You are an expert and efficient research planner. Your goal is to create the SHORTEST POSSIBLE, logical, step-by-step plan to solve a user's research task.
50
+
51
+ **Core Principles:**
52
+ 1. **Analyze Complexity**: First, determine if the task is simple or complex.
53
+ - A **simple task** can be solved with a single, well-formulated search and analysis (e.g., "Who won the 1998 World Cup?").
54
+ - A **complex task** requires finding one piece of information to unlock the next (e.g., "Who is the manager of the team that won the 1998 World Cup?").
55
+ 2. **Create the Plan**:
56
+ - For a **simple task**, create a plan with ONLY ONE step: a clear instruction to find the final answer.
57
+ - For a **complex task**, break it down into the minimum number of sequential steps required. Each step must build upon the previous one.
58
+ 3. **Focus on Actions**: Each step should describe an action to find a specific piece of information.
59
+
60
+ ---
61
+ **Example 1: Simple Task**
62
+ * **User Task:** "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
63
+ * **Your Output (Plan):**
64
+ "steps": [
65
+ "Search Wikipedia for the discography of Mercedes Sosa, find all studio albums released between 2000 and 2009, and count them."
66
+ ]
67
+
68
+ **Example 2: Complex Task**
69
+ * **User Task:** "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name."
70
+ * **Your Output (Plan):**
71
+ "steps": [
72
+ "Find the name of the actor who played Ray in the Polish version of 'Everybody Loves Raymond'.",
73
+ "Using the actor's name, find their role in the show 'Magda M.' and extract the character's first name."
74
+ ]
75
+ ---
76
+
77
+ Now, analyze the following user task and generate the most efficient, step-by-step research plan.
78
+ **User Task:** {task}
79
+ **Your Output (Plan):**
80
+ """
81
+
82
+ response_plan = structured_llm.invoke([SystemMessage(content=planning_prompt)])
83
+ print("--- ✅ PLANNING COMPLETE ---")
84
+ print("Generated Plan:", response_plan.steps)
85
+ return {"plan": response_plan, "current_plan_step": 0}
86
+
87
+
88
+ def search_node(state: ResearchState):
89
+ """Node 2: Performs a web search for a single step of the plan."""
90
+ step_index = state["current_plan_step"]
91
+ plan_steps = state["plan"].steps
92
+ current_step_instruction = plan_steps[step_index]
93
+ context_summary = state["step_results"]
94
+
95
+ print(f"--- 🔎 SEARCH NODE (Executing step: '{current_step_instruction}') ---")
96
+ query_prompt = f"""
97
+ You are an expert at generating search engine queries.
98
+ Your goal is to create a single, concise, and effective Google search query to accomplish the given plan step, using the context from previous steps.
99
+
100
+ **Current Plan Step to Execute:** "{current_step_instruction}"
101
+ **Context from Previous Steps' Findings:**
102
+ ---
103
+ {context_summary}
104
+ ---
105
+
106
+ Based on the **Current Plan Step** and the **Context**, generate the single best possible search query to find the next piece of information.
107
+ For example, if the context is "The actor is Bartek Kasprzykowski" and the step is "Find his role in Magda M.", a good query would be "Bartek Kasprzykowski role in Magda M.".
108
+ """
109
+
110
+ # Genera la query
111
+ query = llm.invoke([SystemMessage(content=query_prompt)]).content.strip('"')
112
+ print(f"--- Generated Context-Aware Query: '{query}' ---")
113
+
114
+ # Eseguiamo il tool di ricerca su Google
115
+ search_results = google_search_tool.invoke(query)
116
+
117
+ # Aggiorniamo lo stato
118
+ return {"messages": [AIMessage(content=search_results)]}
119
+
120
+
121
+ def browse_node(state: ResearchState):
122
+ """Node 3: Analyzes search results and decides which URL to browse, prioritizing Wikipedia."""
123
+ # L'ultimo messaggio contiene i risultati della ricerca Google
124
+ search_results = state["messages"][-1].content
125
+
126
+ print(f"--- 📖 BROWSE NODE (Analyzing search results) ---")
127
+
128
+ # Prompt per scegliere l'URL e il tool corretto
129
+ browse_prompt = f"""
130
+ You are an expert at selecting the best information source.
131
+ Given a list of Google search results, your goal is to choose the SINGLE best URL to browse to accomplish the current research step.
132
+
133
+ **Current Research Step:** "{state['plan'].steps[state['current_plan_step']]}"
134
+
135
+ **Decision Hierarchy (Strict):**
136
+ 1. **Wikipedia First**: If a reliable `wikipedia.org` link is present and seems highly relevant to the current step, you **MUST** choose it and call the `wikipedia_search_tool`.
137
+ 2. **Browse Other Sources**: If there are no good Wikipedia links, choose the single most promising URL from another reputable source and call the `browse_web_page_tool`.
138
+
139
+ **Search Results:**
140
+ ---
141
+ {search_results}
142
+ ---
143
+
144
+ Based on the hierarchy and the current research step, which single tool call should you make?
145
+ """
146
+
147
+ # Invoca l'LLM per ottenere la decisione sulla chiamata al tool
148
+ message = llm_with_tools.invoke([SystemMessage(content=browse_prompt)])
149
+
150
+ # Controlla se l'LLM ha effettivamente deciso di chiamare un tool
151
+ if not hasattr(message, "tool_calls") or not message.tool_calls:
152
+ # Fallback: se l'LLM non riesce a decidere, lo segnaliamo per passare avanti
153
+ print("--- ⚠️ BROWSE NODE: LLM failed to choose a tool. Skipping browse step. ---")
154
+ return {"messages": [AIMessage(content="No relevant page found to browse.")]}
155
+
156
+ print(f"--- Browse Node decision: Call '{message.tool_calls[0]['name']}' on '{message.tool_calls[0]['args']}' ---")
157
+ return {"messages": message}
158
+
159
+
160
+ def step_synthesis_node(state: ResearchState):
161
+ """Node 4: Summarize the information from the current step and prepare for the next one."""
162
+ print(" --- 🔄 STEP SYNTHESIS NODE ---")
163
+
164
+ current_step_instruction = state["plan"].steps[state["current_plan_step"]]
165
+ browsed_content = state["messages"][-1].content
166
+
167
+ summary_prompt = f"""
168
+ You are a factual extractor and research analyst.
169
+ Your goal is to extract key pieces of information from the provided content to satisfy a specific sub-task and prepare for the next step.
170
+
171
+ **Sub-Task (Instruction to accomplish):** "{current_step_instruction}"
172
+
173
+ **Content Gathered in this Step:**
174
+ ---
175
+ {browsed_content}
176
+ ---
177
+
178
+ **Analysis:**
179
+ 1. **Extract Key Facts**: From the "Content Gathered", pull out the specific names, dates, numbers, or links that directly answer the "Sub-Task".
180
+ 2. **Assess Step Completion**: Was the sub-task successfully completed with this information?
181
+ 3. **Synthesize for Next Step**: Create a very concise summary of your findings. This summary will be used as context for the next step in the plan. If the sub-task was not completed, state what is still missing.
182
+
183
+ **Your Output:**
184
+ Provide a concise summary of your findings. For example:
185
+ "Successfully found the actor's name: Bartek Kasprzykowski."
186
+ or
187
+ "Failed to find the specific NASA award number on this page, but confirmed the paper was written by the correct team."
188
+ """
189
+
190
+ step_summary = llm.invoke([SystemMessage(content=summary_prompt)]).content
191
+ print(f"--- ✅ STEP {state['current_plan_step'] + 1} COMPLETE. Summary: '{step_summary}' ---")
192
+
193
+ # Aggiunge il riassunto ai risultati a lungo termine e avanza il contatore
194
+ return {"step_results": [step_summary], "current_plan_step": state["current_plan_step"] + 1}
195
+
196
+
197
+ def final_synthesis_node(state: ResearchState):
198
+ """Node 5: Takes all the summarized results from each step and combines them into a complete and final answer for the original task."""
199
+ print("--- ✍️ FINAL SYNTHESIS NODE ---")
200
+
201
+ # Raccoglie i riassunti di ogni passo dalla memoria a lungo termine dello stato
202
+ step_summaries = state.get("step_results", [])
203
+
204
+ # Controlla se abbiamo effettivamente dei risultati da sintetizzare
205
+ if not step_summaries:
206
+ final_report = "The research process concluded, but no conclusive information was gathered to answer the task."
207
+ return {"messages": [AIMessage(content=final_report)]}
208
+
209
+ # Crea un contesto pulito per l'LLM finale
210
+ full_context = "\n\n".join(
211
+ [f"Finding from Step {i+1}: {summary}" for i, summary in enumerate(step_summaries)]
212
+ )
213
+
214
+ # Prompt per la sintesi finale
215
+ final_prompt = f"""
216
+ You are an expert data analyst and report writer.
217
+ Your final and most important task is to synthesize the provided research findings to answer the user's original task with extreme precision.
218
+
219
+ **User's Original Task:**
220
+ ---
221
+ "{state['task']}"
222
+ ---
223
+
224
+ **Summary of Findings from Each Research Step:**
225
+ ---
226
+ {full_context}
227
+ ---
228
+
229
+ **Your Analytical Process (You MUST follow this):**
230
+ 1. **Re-read the Original Task**: Pay extremely close attention to all constraints, especially dates, numbers, and specific conditions (e.g., "between 2000 and 2009, included", "first name only").
231
+ 2. **Verify Information**: Scan the "Summary of Findings" and ensure you have all the necessary pieces to construct the answer. Do not invent or infer information that is not present.
232
+ 3. **Construct the Final Answer**: Write a clear, direct, and accurate answer based solely on the verified findings. Address every part of the user's original task.
233
+
234
+ Based on this rigorous process, generate the final answer.
235
+ """
236
+
237
+ # Usa un LLM (può essere lo stesso o uno diverso) per generare il report finale
238
+ final_report = llm.invoke([SystemMessage(content=final_prompt)])
239
+ print("--- ✅ FINAL REPORT GENERATED ---")
240
+
241
+ # Aggiunge il report finale ai messaggi, che sarà l'output finale del grafo
242
+ return {"messages": final_report}
243
+
244
+
245
+ # --- 4. Costruzione del Grafo a Pipeline ---
246
+ def router(state: ResearchState):
247
+ """Decides whether to proceed to the next step or move on to the final summary."""
248
+ print("--- 🔍 ROUTER ---")
249
+ if state["current_plan_step"] < len(state["plan"].steps):
250
+ print(" - Decision: Continue to next pipeline cycle.")
251
+ return "continue_pipeline"
252
+ else:
253
+ print(" - Decision: Plan complete. Proceed to final synthesis.")
254
+ return "end_pipeline"
255
+
256
+
257
+ builder = StateGraph(ResearchState)
258
+ builder.add_node("planning", planning_node)
259
+ builder.add_node("search", search_node)
260
+ builder.add_node("browse", browse_node)
261
+ builder.add_node("tools", ToolNode([wikipedia_search_tool, browse_web_page_tool]))
262
+ builder.add_node("synthesis", step_synthesis_node)
263
+ builder.add_node("final_synthesizer", final_synthesis_node)
264
+
265
+ builder.add_edge(START, "planning")
266
+ builder.add_edge("planning", "search")
267
+ builder.add_edge("search", "browse")
268
+ builder.add_edge("browse", "tools")
269
+ builder.add_edge("tools", "synthesis")
270
+ # Dopo la sintesi di un passo, il router decide se ricominciare o finire
271
+ builder.add_conditional_edges(
272
+ "synthesis",
273
+ router,
274
+ {
275
+ "continue_pipeline": "search",
276
+ "end_pipeline": "final_synthesizer"
277
+ }
278
+ )
279
+ builder.add_edge("final_synthesizer", END)
280
+
281
+ web_search_graph = builder.compile()
282
+ #display(Image(web_search_graph.get_graph(xray=1).draw_mermaid_png(output_file_path="./web_search_graph.png")))