gabejavitt commited on
Commit
9f84911
·
verified ·
1 Parent(s): 7e7c48d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -338
app.py CHANGED
@@ -12,7 +12,7 @@ import json
12
  import re
13
  import uuid
14
  import time
15
- import ast
16
 
17
  # --- Pydantic Import ---
18
  from pydantic import BaseModel, Field
@@ -63,7 +63,7 @@ except Exception as e:
63
  agent = None
64
 
65
  # ====================================================
66
- # --- Tool Definitions ---
67
 
68
  class SearchInput(BaseModel):
69
  query: str = Field(description="The search query.")
@@ -465,89 +465,65 @@ def remove_fences_simple(text):
465
  return text
466
  return original_text
467
 
 
 
468
  def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
469
  """
470
  Parses malformed tool call strings (dribbled) from an LLM response.
471
-
472
- Tries two strategies:
473
- 1. <function(tool_name)>{json}</function> format
474
- 2. Bare JSON with tool name inference
475
-
476
- Args:
477
- content: Raw text string from LLM response
478
- tools: List of valid tool definitions for validation
479
-
480
- Returns:
481
- List containing a ToolCall object if parsing succeeded, empty list otherwise
482
  """
483
-
484
- def extract_json_with_balanced_braces(text: str) -> str:
485
- """Extract first complete JSON object using balanced brace counting."""
486
- start_idx = text.find('{')
487
- if start_idx == -1:
488
- return ""
489
-
490
- brace_count = 0
491
- in_string = False
492
- escape_next = False
493
-
494
- for i in range(start_idx, len(text)):
495
- char = text[i]
496
-
497
- if escape_next:
498
- escape_next = False
499
- continue
500
-
501
- if char == '\\':
502
- escape_next = True
503
- continue
504
-
505
- if char == '"':
506
- in_string = not in_string
507
- continue
508
-
509
- if not in_string:
510
- if char == '{':
511
- brace_count += 1
512
- elif char == '}':
513
- brace_count -= 1
514
- if brace_count == 0:
515
- return text[start_idx:i+1]
516
-
517
- return ""
518
-
519
  tool_name = None
520
  tool_input = None
521
-
522
- print(f"Original LLM content for fallback parsing:\n---\n{content[:500]}...\n---")
523
-
524
- # ========================================================================
525
- # STRATEGY 1: Try to parse <function(tool_name)>{json}</function> format
526
- # ========================================================================
527
- REGEX_STRING_FOR_FUNCTION = r"<function\(([^)]+)\)>"
528
-
529
- func_match = re.search(REGEX_STRING_FOR_FUNCTION, content, re.IGNORECASE)
530
 
531
  if func_match:
532
  try:
533
- tool_name = func_match.group(1).strip()
534
-
535
- # Extract JSON starting after the function tag
536
- json_start = func_match.end()
537
- remaining_content = content[json_start:]
538
- json_str = extract_json_with_balanced_braces(remaining_content)
539
 
540
- if json_str:
541
- tool_input = json.loads(json_str)
542
- print(f"🔧 Fallback (Format 1): Parsed tool call for '{tool_name}'")
 
 
 
 
 
 
 
543
  else:
544
- print(f"⚠️ Fallback (Format 1): Found <function> but no valid JSON")
545
  tool_name = None
546
 
547
  except json.JSONDecodeError as e:
548
- print(f"⚠️ Fallback (Format 1): Failed to parse JSON: {e}")
549
- tool_name = None
550
- tool_input = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
 
552
  # ========================================================================
553
  # STRATEGY 2: Try to parse bare JSON (if Strategy 1 failed)
@@ -672,32 +648,52 @@ defined_tools = [
672
  ]
673
 
674
 
675
- # --- LangGraph Agent State ---
676
  class AgentState(TypedDict):
677
  messages: Annotated[List[AnyMessage], add_messages]
678
- plan: List[str] # A list of steps to execute
679
  turn: int
680
 
681
 
682
- # --- Conditional Edge Function ---
683
- def route_from_planner(state: AgentState):
684
  """
685
- Routes to the executor if a plan exists, or ends the graph if the plan is complete.
686
  """
687
- plan = state.get('plan', [])
688
- if plan:
689
- print("--- Condition: Plan has steps. Routing to executor. ---")
690
- return "executor"
691
- else:
692
- print("--- Condition: Plan is empty. Ending. ---")
 
 
 
 
 
 
 
693
  return END
694
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695
 
696
  # ====================================================
697
- # --- Basic Agent Class ---
698
  class BasicAgent:
699
  def __init__(self):
700
- print("BasicAgent (Planner-Executor) initializing...")
701
 
702
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
703
  if not GROQ_API_KEY:
@@ -725,7 +721,6 @@ class BasicAgent:
725
  # Build tool descriptions
726
  tool_desc_list = []
727
  for tool in self.tools:
728
- # Use Pydantic schema if available for richer descriptions
729
  if tool.args_schema:
730
  schema = tool.args_schema.model_json_schema()
731
  args_desc = []
@@ -738,20 +733,19 @@ class BasicAgent:
738
  desc = f"- {tool.name}: {tool.description}"
739
  tool_desc_list.append(desc)
740
  tool_descriptions = "\n".join(tool_desc_list)
741
- tool_names_list = [t.name for t in self.tools]
742
- tool_names_str = ", ".join(tool_names_list)
743
-
744
  # ==================== SYSTEM PROMPT V7 (Simplified) ====================
 
745
  self.system_prompt = f"""You are a highly intelligent AI assistant for the GAIA benchmark.
746
  Your goal: Provide the EXACT answer in the EXACT format requested.
747
 
748
  **PROTOCOL:**
749
 
750
- 1. **ANALYZE:** Read the question. What info is needed? What is the answer format?
751
- 2. **ACT:** Call ONE tool to get information.
752
  3. **EVALUATE:** Look at the tool's output. Do you have the final answer?
753
- - **If NO:** Go back to Step 2.
754
- - **If YES:** Call final_answer_tool immediately.
755
 
756
  **CRITICAL RULES:**
757
 
@@ -766,319 +760,165 @@ Your goal: Provide the EXACT answer in the EXACT format requested.
766
  **EXAMPLE: FINAL ANSWER**
767
  {{ "name": "final_answer_tool", "arguments": {{"answer": "28"}} }}
768
 
769
- **EXAMPLE: RAG SCRAPER**
770
- {{ "name": "scrape_and_retrieve", "arguments": {{"url": "https://example.com", "query": "what is X?"}} }}
771
-
772
- **CRITICAL TOOL CALLING FORMAT:**
773
- When calling tools, you MUST use this EXACT JSON format:
774
- {{"name": "tool_name", "arguments": {{"param": "value"}}}}
775
-
776
- NEVER use XML format like <function(...)>.
777
- NEVER include tool name in arguments.
778
-
779
- **AVAILABLE TOOLS:**
780
  {tool_descriptions}
781
 
782
-
783
- **REMEMBER:** Use tools. Format JSON correctly.
784
  """
785
 
786
- print("Initializing Groq LLMs...")
787
  try:
788
- # LLM 1: The Executor (binds to tools)
789
- self.executor_llm = ChatGroq(
790
  temperature=0,
791
  groq_api_key=GROQ_API_KEY,
792
- model_name="openai/gpt-oss-120b",
793
  max_tokens=4096,
794
- timeout=60,
795
- #model_kwargs={"response_format": {"type": "json_object"}} # Force JSON
796
  ).bind_tools(self.tools)
797
- print("✅ Executor LLM (with tools) initialized.")
798
 
799
- # LLM 2: The Planner (no tools, just reasoning)
800
- self.planner_llm = ChatGroq(
801
- temperature=0,
802
- groq_api_key=GROQ_API_KEY,
803
- model_name="openai/gpt-oss-120b",
804
- max_tokens=4096,
805
- timeout=60
806
- ).bind(tool_choice="none")
807
- print("✅ Planner LLM (no tools) initialized.")
808
  except Exception as e:
809
  print(f"❌ Error initializing Groq: {e}")
810
  raise
811
-
812
- # --- Define Planner Prompt ---
813
- self.planner_prompt = """You are a planning assistant. Your ONLY job is to output a Python list.
814
-
815
- AVAILABLE TOOLS:
816
- {tool_names}
817
-
818
- Original Question: {{original_question}}
819
-
820
- Recent History:
821
- {{history}}
822
-
823
- INSTRUCTIONS:
824
- 1. Check if the task is complete (look for final_answer_tool in history)
825
- - If YES: Output []
826
- - If NO: Create 1-2 next steps
827
-
828
- 2. Each step MUST use one of these EXACT tool names:
829
- - search_tool (for web searches)
830
- - code_interpreter (for calculations, data processing)
831
- - scrape_and_retrieve (for specific webpage content)
832
- - read_file (to read uploaded files)
833
- - final_answer_tool (when you have the final answer)
834
-
835
- 3. Format: "Use [exact_tool_name] to [specific action]"
836
-
837
- EXAMPLES:
838
- ["Use search_tool to find information about porterhouse steak"]
839
- ["Use code_interpreter to calculate 15 factorial"]
840
- ["Use scrape_and_retrieve to extract recipe from Reddit"]
841
- ["Use final_answer_tool to submit the answer"]
842
- []
843
-
844
- CRITICAL: Use ONLY the tools listed above. Output ONLY the list.
845
-
846
- Your response:"""
847
-
848
- # Store tool names in the prompt
849
- self.planner_prompt = self.planner_prompt.format(
850
- tool_names=tool_names_str)
851
 
852
- # --- Node 1: The Planner ---
853
- def planner_node(state: AgentState):
854
  current_turn = state.get('turn', 0) + 1
855
  print(f"\n{'='*60}")
856
- print(f"PLANNER TURN {current_turn}/{MAX_TURNS}")
857
  print('='*60)
858
-
859
- if current_turn > MAX_TURNS:
860
- print("--- Max turns reached. Ending. ---")
861
- return {"plan": [], "turn": current_turn}
862
-
863
- # DON'T pass along existing plan - always replan!
864
-
865
- # Get last 10 messages for context
866
- recent_messages = state['messages'][-10:]
867
- history_str = "\n".join([
868
- f"{msg.__class__.__name__}: {str(msg.content)[:200]}..."
869
- for msg in recent_messages
870
- ])
871
-
872
- # Extract original question
873
- original_question = next(
874
- (msg.content for msg in state['messages'] if isinstance(msg, HumanMessage)),
875
- "Unknown question"
876
- )
877
-
878
- # Check if final_answer_tool was called
879
- for msg in reversed(state['messages']):
880
- if isinstance(msg, AIMessage) and msg.tool_calls:
881
- if any(tc.get('name') == 'final_answer_tool' for tc in msg.tool_calls):
882
- print("✅ Final answer detected. Ending.")
883
- return {"plan": [], "turn": current_turn}
884
-
885
- # Format prompt
886
- prompt = self.planner_prompt.format(
887
- original_question=original_question,
888
- history=history_str
889
- )
890
-
891
- # Call planner LLM
892
- try:
893
- response = self.planner_llm.invoke(prompt)
894
- plan_str = response.content
895
- print(f"Raw planner output: {plan_str[:300]}...")
896
- except Exception as e:
897
- print(f"⚠️ Planner LLM failed: {e}")
898
- return {"plan": [], "turn": current_turn}
899
-
900
- # Parse plan with multiple strategies
901
- plan_list = []
902
-
903
- # Strategy 1: Try to find a list in the output
904
- match = re.search(r'\[([^\]]*)\]', plan_str, re.DOTALL)
905
- if match:
906
- try:
907
- list_str = '[' + match.group(1) + ']'
908
- # Clean up common issues
909
- list_str = list_str.replace('\n', ' ')
910
- list_str = re.sub(r'\s+', ' ', list_str) # Normalize whitespace
911
-
912
- parsed = json.loads(list_str)
913
- if isinstance(parsed, list) and all(isinstance(x, str) for x in parsed):
914
- plan_list = parsed
915
- print(f"✅ Parsed plan: {plan_list}")
916
- except json.JSONDecodeError:
917
- print(f"⚠️ Failed to parse as JSON")
918
-
919
- # Strategy 2: Look for quoted strings if JSON parsing failed
920
- if not plan_list:
921
- quoted_strings = re.findall(r'"([^"]+)"', plan_str)
922
- if quoted_strings and len(quoted_strings) <= 5:
923
- # Check if they look like tool steps
924
- valid_steps = []
925
- for s in quoted_strings:
926
- if any(tool.name in s.lower() for tool in self.tools):
927
- valid_steps.append(s)
928
- if valid_steps:
929
- plan_list = valid_steps
930
- print(f"✅ Extracted steps from quotes: {plan_list}")
931
-
932
- # Validate plan
933
- if plan_list:
934
- # Remove any non-descriptive or invalid steps
935
- validated_plan = []
936
- for step in plan_list:
937
- step_lower = step.lower().strip()
938
-
939
- # Check if step mentions ANY tool
940
- mentioned_tool = None
941
- for tool in self.tools:
942
- if tool.name.lower() in step_lower:
943
- mentioned_tool = tool.name
944
- break
945
-
946
- if mentioned_tool:
947
- # Valid step - has a real tool name
948
- validated_plan.append(step)
949
- print(f"✅ Accepted step: '{step}' (uses {mentioned_tool})")
950
- else:
951
- # Invalid - no real tool mentioned
952
- print(f"❌ Rejected step: '{step}' (no valid tool name found)")
953
-
954
- plan_list = validated_plan
955
-
956
- if not plan_list:
957
- print("⚠️ No valid plan generated. Ending.")
958
 
959
- print(f"📋 Final Plan: {plan_list}")
960
- return {"plan": plan_list, "turn": current_turn}
 
961
 
962
- # --- Node 2: The Executor ---
963
- def executor_node(state: AgentState):
964
- print(f"\n--- EXECUTOR ---")
965
-
966
- plan = state.get('plan', [])
967
- if not plan:
968
- print("⚠️ No plan to execute!")
969
- return {"messages": [], "plan": []}
970
-
971
- current_step = plan[0]
972
- print(f"Executing Step: {current_step}")
973
-
974
- # Build executor message
975
- executor_messages = state['messages'] + [
976
- HumanMessage(content=f"""Execute: {current_step}
977
-
978
- Available tools: search_tool, code_interpreter, scrape_and_retrieve, final_answer_tool
979
-
980
- Call ONE tool in JSON format: {{"name": "tool_name", "arguments": {{...}}}}""")
981
- ]
982
-
983
- # Try to call LLM
984
  max_retries = 3
985
  ai_message = None
986
  for attempt in range(max_retries):
987
  try:
988
- ai_message = self.executor_llm.invoke(executor_messages)
 
989
  break
990
  except Exception as e:
991
- print(f"⚠️ Executor LLM attempt {attempt+1}/{max_retries} failed: {e}")
992
  if attempt == max_retries - 1:
993
- ai_message = AIMessage(content=f"Error: Executor LLM failed: {e}")
 
 
994
  time.sleep(2 ** attempt)
995
 
996
- # Fallback parsing
997
  if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
998
  parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
999
  if parsed_tool_calls:
1000
- print("🔧 Fallback SUCCESS: Rebuilt tool call")
1001
  ai_message.tool_calls = parsed_tool_calls
1002
- ai_message.content = ""
1003
  else:
1004
- print(f"⚠️ Fallback FAILED")
1005
 
1006
  if ai_message.tool_calls:
1007
- print(f"🔧 Tool Call: {ai_message.tool_calls[0]['name']}")
1008
  else:
1009
- print("⚠️ No tool call generated")
1010
 
1011
- # IMPORTANT: Clear the plan so planner creates a new one
1012
- return {"messages": [ai_message], "plan": []}
1013
 
1014
  # --- Tool Node ---
1015
  tool_node = ToolNode(self.tools)
1016
 
1017
  # --- Build Graph ---
1018
- print("Building Planner-Executor graph...")
1019
  graph_builder = StateGraph(AgentState)
1020
 
1021
- graph_builder.add_node("planner", planner_node)
1022
- graph_builder.add_node("executor", executor_node)
1023
  graph_builder.add_node("tools", tool_node)
1024
 
1025
- graph_builder.add_edge(START, "planner")
1026
 
1027
  graph_builder.add_conditional_edges(
1028
- "planner",
1029
- route_from_planner,
1030
  {
1031
- "executor": "executor",
 
1032
  END: END
1033
  }
1034
  )
1035
 
1036
- graph_builder.add_edge("executor", "tools")
1037
- graph_builder.add_edge("tools", "planner") # Loop back to planner
1038
 
1039
  self.graph = graph_builder.compile()
1040
- print("✅ Planner-Executor graph compiled successfully.")
1041
 
1042
  def __call__(self, question: str) -> str:
1043
  print(f"\n--- Starting Agent Run for Question ---")
1044
- print(f"Question: {question[:100]}...")
1045
 
 
1046
  graph_input = {
1047
  "messages": [
1048
  SystemMessage(content=self.system_prompt),
1049
  HumanMessage(content=question)
1050
  ],
1051
- "plan": [],
1052
  "turn": 0
1053
  }
1054
 
1055
  final_answer = "AGENT FAILED TO PRODUCE ANSWER"
1056
  try:
1057
- config = {"recursion_limit": 50} # Increased from 25
1058
-
1059
  for event in self.graph.stream(graph_input, stream_mode="values", config=config):
1060
- last_message = event["messages"][-1]
 
 
 
 
1061
 
1062
- # Check for final answer
1063
  if isinstance(last_message, AIMessage) and last_message.tool_calls:
1064
- for tc in last_message.tool_calls:
1065
- if tc.get("name") == "final_answer_tool":
1066
- final_answer = tc['args'].get('answer', "ERROR")
1067
- print(f"✅ Final Answer: '{final_answer}'")
1068
- break
1069
-
1070
- if final_answer != "AGENT FAILED TO PRODUCE ANSWER":
1071
- break
1072
-
1073
- # Clean answer
 
 
 
 
 
 
 
 
 
 
1074
  cleaned_answer = str(final_answer).strip()
1075
- print(f"Returning: '{cleaned_answer}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
1076
  return cleaned_answer
1077
-
1078
  except Exception as e:
1079
- print(f"Error: {e}")
1080
- traceback.print_exc()
1081
- return f"AGENT ERROR: {e}"
 
1082
 
1083
 
1084
  # ====================================================
@@ -1095,7 +935,7 @@ except Exception as e:
1095
 
1096
  # ====================================================
1097
  # --- (Original Template Code - Mock Questions Version) ---
1098
- def run_and_submit_all( profile: gr.OAuthProfile | None):
1099
  """
1100
  Fetches MOCK questions, runs the BasicAgent on them, simulates submission prep,
1101
  and displays the results. DOES NOT SUBMIT.
@@ -1109,7 +949,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
1109
  return "FATAL ERROR: Global agent failed to initialize. Check logs.", None
1110
 
1111
  print("Using globally instantiated agent.")
1112
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
1113
  print(f"Agent code URL: {agent_code}")
1114
  print("--- USING MOCK QUESTIONS ---")
1115
 
@@ -1198,9 +1038,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
1198
  "task_id": "mock_level1_020",
1199
  "question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
1200
  }
1201
- #
1202
- # ^^^ PASTE YOUR FULL LIST OF 20 MOCK QUESTIONS HERE ^^^
1203
- #
1204
  ]
1205
 
1206
  questions_data = mock_questions_data
@@ -1237,7 +1074,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
1237
 
1238
  status_update = f"Finished mock run. Processed {len(answers_payload)} answers for '{username}'."
1239
  print(status_update); print("--- MOCK RUN - SUBMISSION SKIPPED ---")
1240
- final_status = "--- MOK RUN COMPLETE ---\n" + status_update + "\nSubmission SKIPPED."
1241
  results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
1242
  return final_status, results_df
1243
 
@@ -1247,7 +1084,7 @@ with gr.Blocks() as demo:
1247
  gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
1248
  gr.Markdown("""
1249
  **Instructions:** Click 'Run Mock Evaluation'.
1250
- **Notes:** Uses Groq (Llama 3.1 8B). Ensure `GROQ_API_KEY` secret/env var exists. **DOES NOT** fetch official Qs or submit. Check logs for details.
1251
  """)
1252
  gr.LoginButton()
1253
  run_button = gr.Button("Run Mock Evaluation")
@@ -1257,7 +1094,7 @@ with gr.Blocks() as demo:
1257
 
1258
  if __name__ == "__main__":
1259
  print("\n" + "-"*30 + " App Starting " + "-"*30)
1260
- space_host_startup = os.getenv("SPACE_ID"); space_id_startup = os.getenv("SPACE_ID")
1261
  if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n Runtime URL: https://{space_host_startup}.hf.space")
1262
  else: print("ℹ️ No SPACE_HOST (local?).")
1263
  if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n Repo URL: https://huggingface.co/spaces/{space_id_startup}\n Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
@@ -1272,3 +1109,4 @@ if __name__ == "__main__":
1272
  print("Launching Gradio Interface...")
1273
  demo.queue().launch(debug=True, share=False)
1274
 
 
 
12
  import re
13
  import uuid
14
  import time
15
+ import ast # <-- Import ast module
16
 
17
  # --- Pydantic Import ---
18
  from pydantic import BaseModel, Field
 
63
  agent = None
64
 
65
  # ====================================================
66
+ # --- Tool Definitions (Unchanged) ---
67
 
68
  class SearchInput(BaseModel):
69
  query: str = Field(description="The search query.")
 
465
  return text
466
  return original_text
467
 
468
+
469
+ # --- *** ROBUST FALLBACK PARSER *** ---
470
  def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
471
  """
472
  Parses malformed tool call strings (dribbled) from an LLM response.
 
 
 
 
 
 
 
 
 
 
 
473
  """
474
+ print(f"Original LLM content for fallback parsing:\n---\n{content}\n---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
  tool_name = None
476
  tool_input = None
477
+ cleaned_str = None # For storing cleaned string before parsing
478
+
479
+ # STRATEGY 1: Try to parse <function(tool_name)>...{json_string}...
480
+ # This also handles <function=tool_name>...{json_string}...
481
+ func_match = re.search(
482
+ r"<function[(=]\s*([^)]+)\s*[)>](.*)", # <-- More robust regex
483
+ content,
484
+ re.DOTALL | re.IGNORECASE
485
+ )
486
 
487
  if func_match:
488
  try:
489
+ tool_name = func_match.group(1).strip().replace("'", "").replace('"', '') # Clean tool name
490
+ remaining_content = func_match.group(2)
 
 
 
 
491
 
492
+ json_start_index = remaining_content.find('{')
493
+ if json_start_index != -1:
494
+ json_str = remaining_content[json_start_index:]
495
+ # --- Aggressive Cleaning ---
496
+ cleaned_str = json_str.strip()
497
+ cleaned_str = ''.join(c for c in cleaned_str if c.isprintable() or c in '\n\r\t')
498
+ cleaned_str = cleaned_str.strip().rstrip(',')
499
+
500
+ tool_input = json.loads(cleaned_str)
501
+ print(f"🔧 Fallback (Format 1 - json.loads): Parsed tool call for '{tool_name}'")
502
  else:
503
+ print(f"⚠️ Fallback (Format 1): Found <function> but no JSON blob.")
504
  tool_name = None
505
 
506
  except json.JSONDecodeError as e:
507
+ print(f"⚠️ Fallback (Format 1): json.loads failed after cleaning: {e}. Trying ast.literal_eval.")
508
+ try:
509
+ # Secondary attempt with ast.literal_eval
510
+ if cleaned_str:
511
+ potential_input = ast.literal_eval(cleaned_str)
512
+ if isinstance(potential_input, dict):
513
+ tool_input = potential_input
514
+ print(f"🔧 Fallback (Format 1 - ast.literal_eval): Parsed tool call for '{tool_name}'")
515
+ else:
516
+ print(f"⚠️ Fallback (Format 1): ast.literal_eval did not produce a dict.")
517
+ tool_name = None
518
+ else:
519
+ tool_name = None
520
+
521
+ except (SyntaxError, ValueError) as ast_e:
522
+ print(f"⚠️ Fallback (Format 1): ast.literal_eval also failed: {ast_e}")
523
+ tool_name = None
524
+ except Exception as e_inner:
525
+ print(f"⚠️ Fallback (Format 1): Unexpected error during ast.literal_eval: {e_inner}")
526
+ tool_name = None
527
 
528
  # ========================================================================
529
  # STRATEGY 2: Try to parse bare JSON (if Strategy 1 failed)
 
648
  ]
649
 
650
 
651
+ # --- *** NEW: Reverted AgentState *** ---
652
  class AgentState(TypedDict):
653
  messages: Annotated[List[AnyMessage], add_messages]
 
654
  turn: int
655
 
656
 
657
+ # --- *** NEW: Reverted Conditional Edge Function *** ---
658
+ def should_continue(state: AgentState):
659
  """
660
+ Decide whether to continue, call tools, or end.
661
  """
662
+ last_message = state['messages'][-1]
663
+ current_turn = state.get('turn', 0)
664
+
665
+ # 1. Check for final_answer_tool
666
+ if isinstance(last_message, AIMessage) and last_message.tool_calls:
667
+ for tool_call in last_message.tool_calls:
668
+ if tool_call.get("name") == "final_answer_tool":
669
+ print("--- Condition: final_answer_tool called, ending. ---")
670
+ return END
671
+
672
+ # 2. Check turn limit
673
+ if current_turn >= MAX_TURNS:
674
+ print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
675
  return END
676
 
677
+ # 3. Route to tools if tool calls exist
678
+ if isinstance(last_message, AIMessage) and last_message.tool_calls:
679
+ print("--- Condition: Tools called, routing to tools node. ---")
680
+ return "tools"
681
+
682
+ # 4. Loop prevention
683
+ if len(state['messages']) > 2 and isinstance(last_message, AIMessage) and isinstance(state['messages'][-2], AIMessage):
684
+ print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
685
+ return END
686
+
687
+ # 5. Loop back to agent (reasoning/planning step)
688
+ print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
689
+ return "agent"
690
+
691
 
692
  # ====================================================
693
+ # --- *** NEW: Reverted Basic Agent Class *** ---
694
  class BasicAgent:
695
  def __init__(self):
696
+ print("BasicAgent (Single LLM) initializing...")
697
 
698
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
699
  if not GROQ_API_KEY:
 
721
  # Build tool descriptions
722
  tool_desc_list = []
723
  for tool in self.tools:
 
724
  if tool.args_schema:
725
  schema = tool.args_schema.model_json_schema()
726
  args_desc = []
 
733
  desc = f"- {tool.name}: {tool.description}"
734
  tool_desc_list.append(desc)
735
  tool_descriptions = "\n".join(tool_desc_list)
736
+
 
 
737
  # ==================== SYSTEM PROMPT V7 (Simplified) ====================
738
+ # This prompt is for a single, powerful agent
739
  self.system_prompt = f"""You are a highly intelligent AI assistant for the GAIA benchmark.
740
  Your goal: Provide the EXACT answer in the EXACT format requested.
741
 
742
  **PROTOCOL:**
743
 
744
+ 1. **ANALYZE:** Read the question and history. What is the next logical step?
745
+ 2. **ACT:** Call ONE tool to get information or perform a calculation.
746
  3. **EVALUATE:** Look at the tool's output. Do you have the final answer?
747
+ - **If NO:** Go back to Step 1 and decide the *next* step.
748
+ - **If YES:** Call final_answer_tool immediately with the answer.
749
 
750
  **CRITICAL RULES:**
751
 
 
760
  **EXAMPLE: FINAL ANSWER**
761
  {{ "name": "final_answer_tool", "arguments": {{"answer": "28"}} }}
762
 
763
+ **TOOLS:**
 
 
 
 
 
 
 
 
 
 
764
  {tool_descriptions}
765
 
766
+ **REMEMBER:** One step at a time. Use tools. Format JSON correctly.
 
767
  """
768
 
769
+ print("Initializing Groq LLM...")
770
  try:
771
+ # --- Initialize ONE Powerful LLM for all tasks ---
772
+ self.llm_with_tools = ChatGroq(
773
  temperature=0,
774
  groq_api_key=GROQ_API_KEY,
775
+ model_name="llama-3.3-70b-versatile", # <-- Use the powerful model
776
  max_tokens=4096,
777
+ timeout=60
 
778
  ).bind_tools(self.tools)
779
+ print("✅ Main LLM (llama-3.3-70b-versatile with tools) initialized.")
780
 
 
 
 
 
 
 
 
 
 
781
  except Exception as e:
782
  print(f"❌ Error initializing Groq: {e}")
783
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784
 
785
+ # --- Node 1: The Agent ---
786
+ def agent_node(state: AgentState):
787
  current_turn = state.get('turn', 0) + 1
788
  print(f"\n{'='*60}")
789
+ print(f"AGENT TURN {current_turn}/{MAX_TURNS}")
790
  print('='*60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791
 
792
+ # Note: Max turns is also checked in should_continue, but good to have here
793
+ if current_turn > MAX_TURNS:
794
+ return {"messages": [SystemMessage(content="Max turns reached.")]}
795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796
  max_retries = 3
797
  ai_message = None
798
  for attempt in range(max_retries):
799
  try:
800
+ # Call the single, powerful LLM
801
+ ai_message = self.llm_with_tools.invoke(state["messages"])
802
  break
803
  except Exception as e:
804
+ print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {e}")
805
  if attempt == max_retries - 1:
806
+ ai_message = AIMessage(
807
+ content=f"Error: LLM failed after {max_retries} attempts: {e}"
808
+ )
809
  time.sleep(2 ** attempt)
810
 
811
+ # --- Fallback Parsing Logic ---
812
  if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
813
  parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
814
  if parsed_tool_calls:
815
+ print("🔧 Fallback SUCCESS: Rebuilding tool call(s).")
816
  ai_message.tool_calls = parsed_tool_calls
817
+ ai_message.content = "" # Clear the text content
818
  else:
819
+ print(f"⚠️ Fallback FAILED: Could not parse any tool call from content:\n{ai_message.content[:200]}...")
820
 
821
  if ai_message.tool_calls:
822
+ print(f"🔧 Agent Tool Call: {ai_message.tool_calls[0]['name']}")
823
  else:
824
+ print(f"💭 Agent Reasoning: {ai_message.content[:200]}...")
825
 
826
+ return {"messages": [ai_message], "turn": current_turn}
 
827
 
828
  # --- Tool Node ---
829
  tool_node = ToolNode(self.tools)
830
 
831
  # --- Build Graph ---
832
+ print("Building Single-Agent graph...")
833
  graph_builder = StateGraph(AgentState)
834
 
835
+ graph_builder.add_node("agent", agent_node)
 
836
  graph_builder.add_node("tools", tool_node)
837
 
838
+ graph_builder.add_edge(START, "agent")
839
 
840
  graph_builder.add_conditional_edges(
841
+ "agent",
842
+ should_continue, # Use the reverted conditional function
843
  {
844
+ "tools": "tools",
845
+ "agent": "agent", # For loop prevention
846
  END: END
847
  }
848
  )
849
 
850
+ graph_builder.add_edge("tools", "agent") # Loop back to agent
 
851
 
852
  self.graph = graph_builder.compile()
853
+ print("✅ Single-Agent graph compiled successfully.")
854
 
855
  def __call__(self, question: str) -> str:
856
  print(f"\n--- Starting Agent Run for Question ---")
857
+ print(f"Agent received question (first 100 chars): {question[:100]}...")
858
 
859
+ # --- Initialize Reverted AgentState (no plan) ---
860
  graph_input = {
861
  "messages": [
862
  SystemMessage(content=self.system_prompt),
863
  HumanMessage(content=question)
864
  ],
 
865
  "turn": 0
866
  }
867
 
868
  final_answer = "AGENT FAILED TO PRODUCE ANSWER"
869
  try:
870
+ config = {"recursion_limit": MAX_TURNS + 5}
 
871
  for event in self.graph.stream(graph_input, stream_mode="values", config=config):
872
+
873
+ if event.get('messages'): # Ensure messages exist
874
+ last_message = event["messages"][-1]
875
+ else:
876
+ continue # Skip if no messages yet
877
 
878
+ # Check for final answer extraction
879
  if isinstance(last_message, AIMessage) and last_message.tool_calls:
880
+ if last_message.tool_calls[0].get("name") == "final_answer_tool":
881
+ final_answer_args = last_message.tool_calls[0].get('args', {})
882
+ if 'answer' in final_answer_args:
883
+ final_answer = final_answer_args['answer']
884
+ print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
885
+ break
886
+ else:
887
+ print(f"⚠️ Final Answer tool called without 'answer' argument: {final_answer_args}")
888
+ final_answer = "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER"
889
+ break
890
+
891
+ elif isinstance(last_message, ToolMessage):
892
+ print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
893
+ elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
894
+ print(f"AI Message (Reasoning): {last_message.content[:500]}...")
895
+ elif isinstance(last_message, SystemMessage):
896
+ print(f"System Message: {last_message.content[:500]}...")
897
+
898
+
899
+ # --- Final Answer Cleaning ---
900
  cleaned_answer = str(final_answer).strip()
901
+ prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
902
+ original_cleaned = cleaned_answer
903
+ for prefix in prefixes_to_remove:
904
+ if cleaned_answer.lower().startswith(prefix.lower()):
905
+ potential_answer = cleaned_answer[len(prefix):].strip()
906
+ if potential_answer:
907
+ cleaned_answer = potential_answer
908
+ break
909
+
910
+ cleaned_answer = remove_fences_simple(cleaned_answer)
911
+ if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
912
+ cleaned_answer = cleaned_answer[1:-1].strip()
913
+
914
+ print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
915
  return cleaned_answer
916
+
917
  except Exception as e:
918
+ print(f"Error running agent graph: {e}")
919
+ tb_str = traceback.format_exc()
920
+ print(tb_str)
921
+ return f"AGENT GRAPH ERROR: {e}"
922
 
923
 
924
  # ====================================================
 
935
 
936
  # ====================================================
937
  # --- (Original Template Code - Mock Questions Version) ---
938
+ def run_and_submit_all( profile: gr.OAuthProfile | None): # Corrected type hint
939
  """
940
  Fetches MOCK questions, runs the BasicAgent on them, simulates submission prep,
941
  and displays the results. DOES NOT SUBMIT.
 
949
  return "FATAL ERROR: Global agent failed to initialize. Check logs.", None
950
 
951
  print("Using globally instantiated agent.")
952
+ agent_code = f"httpsS://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run" # Corrected URL
953
  print(f"Agent code URL: {agent_code}")
954
  print("--- USING MOCK QUESTIONS ---")
955
 
 
1038
  "task_id": "mock_level1_020",
1039
  "question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
1040
  }
 
 
 
1041
  ]
1042
 
1043
  questions_data = mock_questions_data
 
1074
 
1075
  status_update = f"Finished mock run. Processed {len(answers_payload)} answers for '{username}'."
1076
  print(status_update); print("--- MOCK RUN - SUBMISSION SKIPPED ---")
1077
+ final_status = "--- Mock RUN COMPLETE ---\n" + status_update + "\nSubmission SKIPPED." # Corrected typo
1078
  results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
1079
  return final_status, results_df
1080
 
 
1084
  gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
1085
  gr.Markdown("""
1086
  **Instructions:** Click 'Run Mock Evaluation'.
1087
+ **Notes:** Uses Groq (Llama-3.3-70b Executor). Ensure `GROQ_API_KEY` secret/env var exists. **DOES NOT** fetch official Qs or submit. Check logs for details.
1088
  """)
1089
  gr.LoginButton()
1090
  run_button = gr.Button("Run Mock Evaluation")
 
1094
 
1095
  if __name__ == "__main__":
1096
  print("\n" + "-"*30 + " App Starting " + "-"*30)
1097
+ space_host_startup = os.getenv("SPACE_ID"); space_id_startup = os.getenv("SPACE_ID") # Corrected variable name
1098
  if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n Runtime URL: https://{space_host_startup}.hf.space")
1099
  else: print("ℹ️ No SPACE_HOST (local?).")
1100
  if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n Repo URL: https://huggingface.co/spaces/{space_id_startup}\n Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
1109
  print("Launching Gradio Interface...")
1110
  demo.queue().launch(debug=True, share=False)
1111
 
1112
+