gabejavitt commited on
Commit
821692a
·
verified ·
1 Parent(s): 1ee1bc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -85
app.py CHANGED
@@ -333,91 +333,91 @@ def should_continue(state: AgentState):
333
 
334
  # --- Basic Agent Definition ---
335
  class BasicAgent:
336
- def __init__(self):
337
- print("BasicAgent (LangGraph) initializing...")
338
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
339
- if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
340
-
341
- self.tools = defined_tools
342
-
343
- # Build tool descriptions separately to avoid f-string backslash issues
344
- tool_desc_list = []
345
- for tool in self.tools:
346
- if tool.name == 'code_interpreter':
347
- desc = (f"- {tool.name}: Executes Python code. Use for calculations, data manipulation, or logic puzzles.\n"
348
- f" **CODE INTERPRETER RULES:**\n"
349
- f" 1. ALWAYS use `print()` for final results.\n"
350
- f" 2. Write SIMPLE, single-step scripts.\n"
351
- f" 3. PLAN your next script using plain text output first.\n"
352
- f" 4. Write reasoning as Python comments (#) before code.\n"
353
- f" 'pandas' (as pd) is available.")
354
- else:
355
- desc = f"- {tool.name}: {tool.description}"
356
- tool_desc_list.append(desc)
357
-
358
- tool_descriptions = "\n".join(tool_desc_list)
359
-
360
- # ==================== SYSTEM PROMPT V4 ====================
361
- self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant for the GAIA benchmark.
362
- Your goal is to provide the concise, factual answer by strictly following a step-by-step reasoning process.
363
-
364
- **CRITICAL PROTOCOL: YOU MUST FOLLOW THIS PROCESS**
365
-
366
- 1. **ANALYZE:** Read the question and all messages in the history.
367
- 2. **MANDATORY FIRST STEP:** Your *first* response on *any* new task MUST be a plan in plain text. Do NOT call any tool on your first turn. Write down your logic, what you need, and which tool you *plan* to use next. Failure to provide a plan first will result in incorrect behavior.
368
- 3. **EXECUTE:** After submitting your plan, you will run again. Now, execute the *next* step of your plan by calling the *one* appropriate tool using the correct JSON format.
369
- 4. **ANALYZE TOOL OUTPUT:** You will receive a ToolMessage with the output. You MUST read it carefully.
370
- 5. **REPEAT or FINISH:**
371
- * **If more steps are needed:** Go back to step 1 (ANALYZE the new info & PLAN). Write an *updated* plan as plain text (e.g., "The search found X. My next step is to use code_interpreter to process X...").
372
- * **If the ToolMessage contains the final answer:** You MUST call the `final_answer_tool`. Your answer *must* be derived *only* from the ToolMessage output, not your own knowledge.
373
-
374
- **RULES:**
375
- * **NEVER** call a tool on the same turn you write a plan (plain text).
376
- * **NEVER** use your pre-trained "leaked" knowledge for the final answer. The answer *must* come from a ToolMessage (e.g., from `code_interpreter`'s print() or `search_tool`).
377
- * **NEVER** answer a logic puzzle from memory. You *must* use `code_interpreter`, ensure it `print()`s the result, analyze that output, and then use that printed result for `final_answer_tool`.
378
- * **NEVER** call `final_answer_tool` until a tool has explicitly given you the answer in its output.
379
- * **Error Handling:** If a tool call returns an Error, your next step (Step 1 PLAN) MUST analyze the error message and propose a *different* approach (different tool, different arguments, different logic). Do not retry the exact same failed call.
380
-
381
- **TOOLS:**
382
- {tool_descriptions}
383
-
384
- **TOOL FORMAT (JSON ONLY):**
385
- Respond ONLY with a single JSON block like this when calling a tool:
386
- ```json
387
- {{
388
- "tool": "tool_name",
389
- "tool_input": {{ "arg_name1": "value1", ... }}
390
- }}
391
- ```
392
- * Replace `tool_name` with the tool's name. Provide arguments in `tool_input`. Match names/types precisely.
393
- * Do not add any text before or after the JSON block.
394
-
395
- Example for final_answer_tool:
396
- ```json
397
- {{
398
- "tool": "final_answer_tool",
399
- "tool_input": {{
400
- "answer": "The final answer string here"
401
- }}
402
- }}
403
- ```
404
- NOTE: The value for "answer" MUST be a string enclosed in double quotes.
405
- """
406
-
407
- print("Initializing Groq LLM Endpoint...")
408
- try:
409
- chat_llm = ChatGroq(
410
- temperature=0.01, # Low temperature for factual tasks
411
- groq_api_key=GROQ_API_KEY,
412
- model_name="openai/gpt-oss-120b" # <-- Switched Model
413
- )
414
- print("✅ Groq LLM Endpoint initialized with openai/gpt-oss-120b.")
415
- except Exception as e:
416
- print(f"Error initializing Groq: {e}")
417
- raise
418
-
419
- self.llm_with_tools = chat_llm.bind_tools(self.tools)
420
- print("✅ Tools bound to LLM (using bind_tools).")
421
 
422
  # --- Agent Node with Robust Parsing Fallback ---
423
  def agent_node(state: AgentState):
 
333
 
334
  # --- Basic Agent Definition ---
335
  class BasicAgent:
336
+ def __init__(self):
337
+ print("BasicAgent (LangGraph) initializing...")
338
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
339
+ if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
340
+
341
+ self.tools = defined_tools
342
+
343
+ # Build tool descriptions separately to avoid f-string backslash issues
344
+ tool_desc_list = []
345
+ for tool in self.tools:
346
+ if tool.name == 'code_interpreter':
347
+ desc = (f"- {tool.name}: Executes Python code. Use for calculations, data manipulation, or logic puzzles.\n"
348
+ f" **CODE INTERPRETER RULES:**\n"
349
+ f" 1. ALWAYS use `print()` for final results.\n"
350
+ f" 2. Write SIMPLE, single-step scripts.\n"
351
+ f" 3. PLAN your next script using plain text output first.\n"
352
+ f" 4. Write reasoning as Python comments (#) before code.\n"
353
+ f" 'pandas' (as pd) is available.")
354
+ else:
355
+ desc = f"- {tool.name}: {tool.description}"
356
+ tool_desc_list.append(desc)
357
+
358
+ tool_descriptions = "\n".join(tool_desc_list)
359
+
360
+ # ==================== SYSTEM PROMPT V4 ====================
361
+ self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant for the GAIA benchmark.
362
+ Your goal is to provide the concise, factual answer by strictly following a step-by-step reasoning process.
363
+
364
+ **CRITICAL PROTOCOL: YOU MUST FOLLOW THIS PROCESS**
365
+
366
+ 1. **ANALYZE:** Read the question and all messages in the history.
367
+ 2. **MANDATORY FIRST STEP:** Your *first* response on *any* new task MUST be a plan in plain text. Do NOT call any tool on your first turn. Write down your logic, what you need, and which tool you *plan* to use next. Failure to provide a plan first will result in incorrect behavior.
368
+ 3. **EXECUTE:** After submitting your plan, you will run again. Now, execute the *next* step of your plan by calling the *one* appropriate tool using the correct JSON format.
369
+ 4. **ANALYZE TOOL OUTPUT:** You will receive a ToolMessage with the output. You MUST read it carefully.
370
+ 5. **REPEAT or FINISH:**
371
+ * **If more steps are needed:** Go back to step 1 (ANALYZE the new info & PLAN). Write an *updated* plan as plain text (e.g., "The search found X. My next step is to use code_interpreter to process X...").
372
+ * **If the ToolMessage contains the final answer:** You MUST call the `final_answer_tool`. Your answer *must* be derived *only* from the ToolMessage output, not your own knowledge.
373
+
374
+ **RULES:**
375
+ * **NEVER** call a tool on the same turn you write a plan (plain text).
376
+ * **NEVER** use your pre-trained "leaked" knowledge for the final answer. The answer *must* come from a ToolMessage (e.g., from `code_interpreter`'s print() or `search_tool`).
377
+ * **NEVER** answer a logic puzzle from memory. You *must* use `code_interpreter`, ensure it `print()`s the result, analyze that output, and then use that printed result for `final_answer_tool`.
378
+ * **NEVER** call `final_answer_tool` until a tool has explicitly given you the answer in its output.
379
+ * **Error Handling:** If a tool call returns an Error, your next step (Step 1 PLAN) MUST analyze the error message and propose a *different* approach (different tool, different arguments, different logic). Do not retry the exact same failed call.
380
+
381
+ **TOOLS:**
382
+ {tool_descriptions}
383
+
384
+ **TOOL FORMAT (JSON ONLY):**
385
+ Respond ONLY with a single JSON block like this when calling a tool:
386
+ ```json
387
+ {{
388
+ "tool": "tool_name",
389
+ "tool_input": {{ "arg_name1": "value1", ... }}
390
+ }}
391
+ ```
392
+ * Replace `tool_name` with the tool's name. Provide arguments in `tool_input`. Match names/types precisely.
393
+ * Do not add any text before or after the JSON block.
394
+
395
+ Example for final_answer_tool:
396
+ ```json
397
+ {{
398
+ "tool": "final_answer_tool",
399
+ "tool_input": {{
400
+ "answer": "The final answer string here"
401
+ }}
402
+ }}
403
+ ```
404
+ NOTE: The value for "answer" MUST be a string enclosed in double quotes.
405
+ """
406
+
407
+ print("Initializing Groq LLM Endpoint...")
408
+ try:
409
+ chat_llm = ChatGroq(
410
+ temperature=0.01, # Low temperature for factual tasks
411
+ groq_api_key=GROQ_API_KEY,
412
+ model_name="openai/gpt-oss-120b" # <-- Switched Model
413
+ )
414
+ print("✅ Groq LLM Endpoint initialized with openai/gpt-oss-120b.")
415
+ except Exception as e:
416
+ print(f"Error initializing Groq: {e}")
417
+ raise
418
+
419
+ self.llm_with_tools = chat_llm.bind_tools(self.tools)
420
+ print("✅ Tools bound to LLM (using bind_tools).")
421
 
422
  # --- Agent Node with Robust Parsing Fallback ---
423
  def agent_node(state: AgentState):