gabejavitt commited on
Commit
d7eb8af
·
verified ·
1 Parent(s): 99b68de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -35
app.py CHANGED
@@ -10,6 +10,7 @@ import torch
10
  import json # For robust tool call parsing/generation if needed
11
  import re # For finding JSON
12
  import uuid # For generating tool call IDs
 
13
 
14
  # --- Multimodal & Web Tool Imports ---
15
  from transformers import pipeline
@@ -68,30 +69,30 @@ def search_tool(query: str) -> str:
68
 
69
  @tool
70
  def code_interpreter(code: str) -> str:
71
- """
72
- Executes a string of Python code and returns its stdout, stderr, and any error.
73
- ...
74
- """
75
  print(f"--- Calling Code Interpreter with code:\n{code}\n---")
76
  output_stream = io.StringIO()
77
  error_stream = io.StringIO()
78
  try:
79
  with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
80
- # --- FIX IS HERE ---
81
- # Create a safe environment that includes 'pd' and standard Python built-ins
82
  safe_globals = {
83
  "pd": pd,
84
- "__builtins__": __builtins__ # This is the crucial addition
85
  }
86
- # Execute the code within this safe environment
87
  exec(code, safe_globals, {})
88
- # --- END FIX ---
89
 
90
  stdout = output_stream.getvalue(); stderr = error_stream.getvalue()
91
  if stderr: return f"Error: {stderr}\nStdout: {stdout}"
92
  if stdout: return f"Success:\n{stdout}"
93
  return "Success: Code executed without error and produced no stdout."
94
- except Exception as e: return f"Execution failed with error: {str(e)}"
 
 
 
 
 
 
 
95
 
96
  @tool
97
  def read_file(path: str) -> str:
@@ -243,7 +244,7 @@ class AgentState(TypedDict):
243
  def should_continue(state: AgentState):
244
  """
245
  Custom logic to decide whether to continue or end.
246
- Now allows for a "reasoning loop".
247
  """
248
  last_message = state['messages'][-1]
249
 
@@ -258,15 +259,11 @@ def should_continue(state: AgentState):
258
  print("--- Condition: Saw other tools, calling tools node. ---")
259
  return "tools"
260
 
261
- # --- THIS IS THE NEW LOGIC ---
262
- # If the last message is from the AI and has NO tool calls (i.e., it's a "thought"),
263
  # loop back to the agent node to let it "think" again.
264
- print("--- Condition: No tool call detected. Looping back to agent. ---")
265
  return "agent"
266
-
267
- # The old "END" path is removed. The only way to END
268
- # is to explicitly call final_answer_tool.
269
-
270
 
271
  # --- Basic Agent Definition ---
272
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
@@ -290,22 +287,46 @@ class BasicAgent:
290
  ])
291
 
292
  # ==================== MODIFIED SYSTEM PROMPT ====================
293
- self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
294
- Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question.
295
-
296
- **CRITICAL INSTRUCTIONS:**
297
- * **DO NOT** provide the final answer as plain text.
298
- * **THE ONLY WAY** to provide a final answer is by calling the `final_answer_tool`.
299
- * **DO NOT** include conversational filler (e.g., "The answer is...").
300
- * **DO NOT** explain your reasoning unless it's inside a `code_interpreter` comment.
301
- * **DO NOT** mix plain text and tool-call JSON in the same response.
302
- * **DO NOT** use XML formats like `<function=...>` or `<code_interpreter>`. **THIS WILL FAIL.**
303
-
304
- You have access to the following tools:
 
 
 
 
 
 
 
 
 
305
  {tool_descriptions}
 
 
 
 
 
 
 
 
 
306
 
307
- **TOOL USAGE PROTOCOL:**
308
- * To call a tool, respond ONLY with a single JSON object formatted exactly like this:
 
 
 
 
 
 
309
  ```json
310
  {{
311
  "tool": "tool_name",
@@ -354,10 +375,18 @@ You have access to the following tools:
354
  print("Building agent graph...")
355
  graph_builder = StateGraph(AgentState)
356
  graph_builder.add_node("agent", agent_node)
357
- graph_builder.add_node("tools", tool_node)
358
  graph_builder.add_edge(START, "agent")
359
- graph_builder.add_conditional_edges("agent", should_continue, {"tools": "tools", END: END})
360
- graph_builder.add_edge("tools", "agent")
 
 
 
 
 
 
 
 
361
  self.graph = graph_builder.compile()
362
  print("✅ Graph compiled.")
363
 
 
10
  import json # For robust tool call parsing/generation if needed
11
  import re # For finding JSON
12
  import uuid # For generating tool call IDs
13
+ import traceback
14
 
15
  # --- Multimodal & Web Tool Imports ---
16
  from transformers import pipeline
 
69
 
70
  @tool
71
  def code_interpreter(code: str) -> str:
72
+ """Executes Python code..."""
 
 
 
73
  print(f"--- Calling Code Interpreter with code:\n{code}\n---")
74
  output_stream = io.StringIO()
75
  error_stream = io.StringIO()
76
  try:
77
  with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
 
 
78
  safe_globals = {
79
  "pd": pd,
80
+ "__builtins__": __builtins__
81
  }
 
82
  exec(code, safe_globals, {})
 
83
 
84
  stdout = output_stream.getvalue(); stderr = error_stream.getvalue()
85
  if stderr: return f"Error: {stderr}\nStdout: {stdout}"
86
  if stdout: return f"Success:\n{stdout}"
87
  return "Success: Code executed without error and produced no stdout."
88
+
89
+ except Exception as e:
90
+ # --- THIS IS THE IMPROVEMENT ---
91
+ # Get the full traceback string
92
+ tb_str = traceback.format_exc()
93
+ print(f"--- Code Interpreter FAILED ---\n{tb_str}\n---")
94
+ return f"Execution failed with error:\n{tb_str}"
95
+ # --- END IMPROVEMENT ---
96
 
97
  @tool
98
  def read_file(path: str) -> str:
 
244
  def should_continue(state: AgentState):
245
  """
246
  Custom logic to decide whether to continue or end.
247
+ This now allows for a "reasoning loop".
248
  """
249
  last_message = state['messages'][-1]
250
 
 
259
  print("--- Condition: Saw other tools, calling tools node. ---")
260
  return "tools"
261
 
262
+ # --- THIS IS THE KEY CHANGE ---
263
+ # If the last message is from the AI and has NO tool calls (i.e., it's plain text),
264
  # loop back to the agent node to let it "think" again.
265
+ print("--- Condition: No tool call. Looping back to agent (reasoning loop). ---")
266
  return "agent"
 
 
 
 
267
 
268
  # --- Basic Agent Definition ---
269
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
287
  ])
288
 
289
  # ==================== MODIFIED SYSTEM PROMPT ====================
290
+ self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant for the GAIA benchmark.
291
+ Your goal is to provide the concise, factual answer by strictly following a step-by-step reasoning process.
292
+
293
+ **CRITICAL PROTOCOL: YOU MUST FOLLOW THIS PROCESS**
294
+
295
+ 1. **ANALYZE:** Read the question and all messages in the history.
296
+ 2. **PLAN:** Your *first* response on any new task MUST be a step-by-step plan as plain text. Do NOT call a tool on your first turn. Write down your logic, what you need to find, and which tool you *plan* to use.
297
+ 3. **EXECUTE:** After you submit your plan, you will run again. Now, execute the *first* step of your plan by calling the *one* appropriate tool.
298
+ 4. **ANALYZE TOOL OUTPUT:** You will receive a [Tool Output] message. You MUST read it.
299
+ 5. **REPEAT or FINISH:**
300
+ * **If more steps are needed:** Go back to step 2 (PLAN). Write an *updated* plan as plain text (e.g., "Step 1 was successful. My new step 2 is...").
301
+ * **If the [Tool Output] contains the final answer:** You MUST call the `final_answer_tool`. Your answer *must* be derived *only* from the [Tool Output], not your own knowledge.
302
+
303
+ **RULES:**
304
+ * **NEVER** call a tool on the same turn you write a plan.
305
+ * **NEVER** use your pre-trained "leaked" knowledge for the final answer. The answer *must* come from a [Tool Output] (e.g., from `code_interpreter`'s print() or `search_tool`).
306
+ * **NEVER** answer a logic puzzle from memory. You *must* use `code_interpreter`, **print the result**, and then use that printed result for your final answer.
307
+ * **NEVER** call `final_answer_tool` until a tool has given you the answer.
308
+ * **Error Handling:** If a tool call fails, your next step (Step 2) must be to write a plan that analyzes the error and tries a *different* approach.
309
+
310
+ **TOOLS:**
311
  {tool_descriptions}
312
+ You have access to the following tools:
313
+ - {tool.name}: ...
314
+ - code_interpreter: Executes Python code.
315
+ **CODE INTERPRETER RULES:**
316
+ 1. **ALWAYS** use a `print()` statement to output your final result. The tool only returns what you print.
317
+ 2. **NEVER** write a complex, multi-step script in one go.
318
+ 3. **ALWAYS** break the problem down. Call the tool with a *simple* script to get one piece of information (e.g., `print(df.head())`).
319
+ 4. Then, use that output (in your "think" step) to plan your *next* simple script (e.g., `print(df['column'].value_counts())`).
320
+ 5. **ALWAYS** write your logical plan as Python comments (`#`) inside the code block *before* you write the code itself.
321
 
322
+ **REASONING PROCESS & STOPPING CONDITION:**
323
+ 1. **PLAN:** First, respond with your step-by-step plan in plain text. Do not call a tool yet.
324
+ 2. **(Graph will loop)**
325
+ 3. **EXECUTE:** Now, call the *one* tool needed for the first step of your plan.
326
+ 4. **ANALYZE:** You will get a [Tool Output].
327
+ 5. **REPEAT:** Go back to step 1. Write an updated plan (e.g., "Step 1 was successful and gave me [data]. My step 2 is...").
328
+ 6. **STOP:** Only call `final_answer_tool` when a [Tool Output] has given you the final, exact answer.
329
+ **TOOL FORMAT (JSON ONLY):**
330
  ```json
331
  {{
332
  "tool": "tool_name",
 
375
  print("Building agent graph...")
376
  graph_builder = StateGraph(AgentState)
377
  graph_builder.add_node("agent", agent_node)
378
+ graph_builder.add_node("tools", tool_node)
379
  graph_builder.add_edge(START, "agent")
380
+ graph_builder.add_edge("tools", "agent") # This edge is correct
381
+ # --- REPLACE your old add_conditional_edges ---
382
+ graph_builder.add_conditional_edges(
383
+ "agent",
384
+ should_continue,
385
+ {
386
+ "tools": "tools", # If tools are called
387
+ "agent": "agent", # If text is generated (the new loop)
388
+ END: END # If final_answer is called
389
+ }
390
  self.graph = graph_builder.compile()
391
  print("✅ Graph compiled.")
392