gabejavitt commited on
Commit
87f9e05
Β·
verified Β·
1 Parent(s): 821692a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -81
app.py CHANGED
@@ -334,90 +334,91 @@ def should_continue(state: AgentState):
334
  # --- Basic Agent Definition ---
335
  class BasicAgent:
336
  def __init__(self):
337
- print("BasicAgent (LangGraph) initializing...")
338
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
339
- if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
340
-
341
- self.tools = defined_tools
342
-
343
- # Build tool descriptions separately to avoid f-string backslash issues
344
- tool_desc_list = []
345
- for tool in self.tools:
346
- if tool.name == 'code_interpreter':
347
- desc = (f"- {tool.name}: Executes Python code. Use for calculations, data manipulation, or logic puzzles.\n"
348
- f" **CODE INTERPRETER RULES:**\n"
349
- f" 1. ALWAYS use `print()` for final results.\n"
350
- f" 2. Write SIMPLE, single-step scripts.\n"
351
- f" 3. PLAN your next script using plain text output first.\n"
352
- f" 4. Write reasoning as Python comments (#) before code.\n"
353
- f" 'pandas' (as pd) is available.")
354
- else:
355
- desc = f"- {tool.name}: {tool.description}"
356
- tool_desc_list.append(desc)
357
-
358
- tool_descriptions = "\n".join(tool_desc_list)
359
-
360
- # ==================== SYSTEM PROMPT V4 ====================
361
- self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant for the GAIA benchmark.
362
- Your goal is to provide the concise, factual answer by strictly following a step-by-step reasoning process.
363
-
364
- **CRITICAL PROTOCOL: YOU MUST FOLLOW THIS PROCESS**
365
-
366
- 1. **ANALYZE:** Read the question and all messages in the history.
367
- 2. **MANDATORY FIRST STEP:** Your *first* response on *any* new task MUST be a plan in plain text. Do NOT call any tool on your first turn. Write down your logic, what you need, and which tool you *plan* to use next. Failure to provide a plan first will result in incorrect behavior.
368
- 3. **EXECUTE:** After submitting your plan, you will run again. Now, execute the *next* step of your plan by calling the *one* appropriate tool using the correct JSON format.
369
- 4. **ANALYZE TOOL OUTPUT:** You will receive a ToolMessage with the output. You MUST read it carefully.
370
- 5. **REPEAT or FINISH:**
371
- * **If more steps are needed:** Go back to step 1 (ANALYZE the new info & PLAN). Write an *updated* plan as plain text (e.g., "The search found X. My next step is to use code_interpreter to process X...").
372
- * **If the ToolMessage contains the final answer:** You MUST call the `final_answer_tool`. Your answer *must* be derived *only* from the ToolMessage output, not your own knowledge.
373
-
374
- **RULES:**
375
- * **NEVER** call a tool on the same turn you write a plan (plain text).
376
- * **NEVER** use your pre-trained "leaked" knowledge for the final answer. The answer *must* come from a ToolMessage (e.g., from `code_interpreter`'s print() or `search_tool`).
377
- * **NEVER** answer a logic puzzle from memory. You *must* use `code_interpreter`, ensure it `print()`s the result, analyze that output, and then use that printed result for `final_answer_tool`.
378
- * **NEVER** call `final_answer_tool` until a tool has explicitly given you the answer in its output.
379
- * **Error Handling:** If a tool call returns an Error, your next step (Step 1 PLAN) MUST analyze the error message and propose a *different* approach (different tool, different arguments, different logic). Do not retry the exact same failed call.
380
-
381
- **TOOLS:**
382
- {tool_descriptions}
383
 
384
- **TOOL FORMAT (JSON ONLY):**
385
- Respond ONLY with a single JSON block like this when calling a tool:
386
- ```json
387
- {{
388
- "tool": "tool_name",
389
- "tool_input": {{ "arg_name1": "value1", ... }}
390
- }}
391
- ```
392
- * Replace `tool_name` with the tool's name. Provide arguments in `tool_input`. Match names/types precisely.
393
- * Do not add any text before or after the JSON block.
394
 
395
- Example for final_answer_tool:
396
- ```json
397
- {{
398
- "tool": "final_answer_tool",
399
- "tool_input": {{
400
- "answer": "The final answer string here"
401
- }}
402
- }}
403
- ```
404
- NOTE: The value for "answer" MUST be a string enclosed in double quotes.
405
- """
406
 
407
- print("Initializing Groq LLM Endpoint...")
408
- try:
409
- chat_llm = ChatGroq(
410
- temperature=0.01, # Low temperature for factual tasks
411
- groq_api_key=GROQ_API_KEY,
412
- model_name="openai/gpt-oss-120b" # <-- Switched Model
413
- )
414
- print("βœ… Groq LLM Endpoint initialized with openai/gpt-oss-120b.")
415
- except Exception as e:
416
- print(f"Error initializing Groq: {e}")
417
- raise
418
-
419
- self.llm_with_tools = chat_llm.bind_tools(self.tools)
420
- print("βœ… Tools bound to LLM (using bind_tools).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
  # --- Agent Node with Robust Parsing Fallback ---
423
  def agent_node(state: AgentState):
 
334
  # --- Basic Agent Definition ---
335
  class BasicAgent:
336
  def __init__(self):
337
+ print("BasicAgent (LangGraph) initializing...")
338
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
339
+ if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
340
+
341
+ self.tools = defined_tools
342
+
343
+ # Build tool descriptions separately to avoid f-string backslash issues
344
+ tool_desc_list = []
345
+ for tool in self.tools:
346
+ if tool.name == 'code_interpreter':
347
+ desc = (f"- {tool.name}: Executes Python code. Use for calculations, data manipulation, or logic puzzles.\n"
348
+ f" **CODE INTERPRETER RULES:**\n"
349
+ f" 1. ALWAYS use `print()` for final results.\n"
350
+ f" 2. Write SIMPLE, single-step scripts.\n"
351
+ f" 3. PLAN your next script using plain text output first.\n"
352
+ f" 4. Write reasoning as Python comments (#) before code.\n"
353
+ f" 'pandas' (as pd) is available.")
354
+ else:
355
+ desc = f"- {tool.name}: {tool.description}"
356
+ tool_desc_list.append(desc)
357
+
358
+ tool_descriptions = "\n".join(tool_desc_list)
359
+
360
+ # ==================== SYSTEM PROMPT V5 (Improved) ====================
361
+ self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant for the GAIA benchmark.
362
+ Your goal is to provide the EXACT, concise, factual answer by strictly following a step-by-step reasoning process.
363
+
364
+ **CRITICAL PROTOCOL: YOU MUST FOLLOW THIS PROCESS**
365
+
366
+ 1. **ANALYZE:** Read the question carefully. Identify what format the answer should be in (number, yes/no, list, name, etc.).
367
+ 2. **PLAN (First Turn Only):** Your *first* response MUST be a brief plan in plain text:
368
+ - What information do you need?
369
+ - Which tool will you use first?
370
+ - What format should the final answer be in?
371
+ DO NOT call any tool on your first turn.
 
 
 
 
 
 
 
 
 
 
 
372
 
373
+ 3. **EXECUTE ONE TOOL:** Call exactly ONE tool per turn. Wait for the result before planning your next step.
374
+
375
+ 4. **VERIFY TOOL OUTPUT:**
376
+ - Read the ToolMessage carefully
377
+ - Check if it contains errors - if so, plan a different approach
378
+ - Check if you have enough information for the final answer
 
 
 
 
379
 
380
+ 5. **ITERATE OR FINISH:**
381
+ - **Need more info?** Write a brief plan (1-2 sentences) then call the next tool
382
+ - **Have the answer?** Call `final_answer_tool` immediately with the EXACT answer from the tool output
 
 
 
 
 
 
 
 
383
 
384
+ **CRITICAL RULES:**
385
+
386
+ * **ANSWER FORMAT:** Match the exact format requested (if question asks for a number, return ONLY the number; if it asks for a list, return ONLY the list)
387
+ * **NO HALLUCINATIONS:** The answer MUST come from tool outputs, NEVER from your training data
388
+ * **ONE TOOL PER TURN:** Never call multiple tools or make plans and tool calls in the same turn
389
+ * **USE CODE FOR LOGIC:** For ANY calculation, counting, or logical reasoning, use `code_interpreter` and ensure it prints the result
390
+ * **ERROR RECOVERY:** If a tool fails, analyze WHY and try a completely different approach
391
+ * **FINAL ANSWER FORMAT:** Strip ALL explanatory text. Examples:
392
+ - Question asks for number β†’ Answer: "42" (not "The answer is 42" or "42 coins")
393
+ - Question asks for list β†’ Answer: "apple, banana, cherry" (not "The list is: apple, banana, cherry")
394
+ - Question asks for yes/no β†’ Answer: "Yes" or "No" (not "Yes, because...")
395
+
396
+ **TOOLS:**
397
+ {tool_descriptions}
398
+
399
+ **REMEMBER:**
400
+ - Use tools, don't guess
401
+ - One tool at a time
402
+ - Final answer must match requested format exactly
403
+ - No explanations in final answer
404
+ """
405
+
406
+ print("Initializing Groq LLM Endpoint...")
407
+ try:
408
+ chat_llm = ChatGroq(
409
+ temperature=0, # Changed from 0.01 to 0 for maximum determinism
410
+ groq_api_key=GROQ_API_KEY,
411
+ model_name="llama-3.3-70b-versatile", # Better model for reasoning
412
+ max_tokens=4096, # Explicit limit
413
+ timeout=60 # Add timeout for stability
414
+ )
415
+ print("βœ… Groq LLM Endpoint initialized with llama-3.3-70b-versatile.")
416
+ except Exception as e:
417
+ print(f"Error initializing Groq: {e}")
418
+ raise
419
+
420
+ self.llm_with_tools = chat_llm.bind_tools(self.tools)
421
+ print("βœ… Tools bound to LLM (using bind_tools).")
422
 
423
  # --- Agent Node with Robust Parsing Fallback ---
424
  def agent_node(state: AgentState):