Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ import json
|
|
| 12 |
import re
|
| 13 |
import uuid
|
| 14 |
import time
|
| 15 |
-
import ast
|
| 16 |
|
| 17 |
# --- Pydantic Import ---
|
| 18 |
from pydantic import BaseModel, Field
|
|
@@ -63,7 +63,7 @@ except Exception as e:
|
|
| 63 |
agent = None
|
| 64 |
|
| 65 |
# ====================================================
|
| 66 |
-
# --- Tool Definitions ---
|
| 67 |
|
| 68 |
class SearchInput(BaseModel):
|
| 69 |
query: str = Field(description="The search query.")
|
|
@@ -465,89 +465,65 @@ def remove_fences_simple(text):
|
|
| 465 |
return text
|
| 466 |
return original_text
|
| 467 |
|
|
|
|
|
|
|
| 468 |
def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
|
| 469 |
"""
|
| 470 |
Parses malformed tool call strings (dribbled) from an LLM response.
|
| 471 |
-
|
| 472 |
-
Tries two strategies:
|
| 473 |
-
1. <function(tool_name)>{json}</function> format
|
| 474 |
-
2. Bare JSON with tool name inference
|
| 475 |
-
|
| 476 |
-
Args:
|
| 477 |
-
content: Raw text string from LLM response
|
| 478 |
-
tools: List of valid tool definitions for validation
|
| 479 |
-
|
| 480 |
-
Returns:
|
| 481 |
-
List containing a ToolCall object if parsing succeeded, empty list otherwise
|
| 482 |
"""
|
| 483 |
-
|
| 484 |
-
def extract_json_with_balanced_braces(text: str) -> str:
|
| 485 |
-
"""Extract first complete JSON object using balanced brace counting."""
|
| 486 |
-
start_idx = text.find('{')
|
| 487 |
-
if start_idx == -1:
|
| 488 |
-
return ""
|
| 489 |
-
|
| 490 |
-
brace_count = 0
|
| 491 |
-
in_string = False
|
| 492 |
-
escape_next = False
|
| 493 |
-
|
| 494 |
-
for i in range(start_idx, len(text)):
|
| 495 |
-
char = text[i]
|
| 496 |
-
|
| 497 |
-
if escape_next:
|
| 498 |
-
escape_next = False
|
| 499 |
-
continue
|
| 500 |
-
|
| 501 |
-
if char == '\\':
|
| 502 |
-
escape_next = True
|
| 503 |
-
continue
|
| 504 |
-
|
| 505 |
-
if char == '"':
|
| 506 |
-
in_string = not in_string
|
| 507 |
-
continue
|
| 508 |
-
|
| 509 |
-
if not in_string:
|
| 510 |
-
if char == '{':
|
| 511 |
-
brace_count += 1
|
| 512 |
-
elif char == '}':
|
| 513 |
-
brace_count -= 1
|
| 514 |
-
if brace_count == 0:
|
| 515 |
-
return text[start_idx:i+1]
|
| 516 |
-
|
| 517 |
-
return ""
|
| 518 |
-
|
| 519 |
tool_name = None
|
| 520 |
tool_input = None
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
# =
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
|
| 531 |
if func_match:
|
| 532 |
try:
|
| 533 |
-
tool_name = func_match.group(1).strip()
|
| 534 |
-
|
| 535 |
-
# Extract JSON starting after the function tag
|
| 536 |
-
json_start = func_match.end()
|
| 537 |
-
remaining_content = content[json_start:]
|
| 538 |
-
json_str = extract_json_with_balanced_braces(remaining_content)
|
| 539 |
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 543 |
else:
|
| 544 |
-
print(f"⚠️ Fallback (Format 1): Found <function> but no
|
| 545 |
tool_name = None
|
| 546 |
|
| 547 |
except json.JSONDecodeError as e:
|
| 548 |
-
print(f"⚠️ Fallback (Format 1):
|
| 549 |
-
|
| 550 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
|
| 552 |
# ========================================================================
|
| 553 |
# STRATEGY 2: Try to parse bare JSON (if Strategy 1 failed)
|
|
@@ -672,32 +648,52 @@ defined_tools = [
|
|
| 672 |
]
|
| 673 |
|
| 674 |
|
| 675 |
-
# ---
|
| 676 |
class AgentState(TypedDict):
|
| 677 |
messages: Annotated[List[AnyMessage], add_messages]
|
| 678 |
-
plan: List[str] # A list of steps to execute
|
| 679 |
turn: int
|
| 680 |
|
| 681 |
|
| 682 |
-
# --- Conditional Edge Function ---
|
| 683 |
-
def
|
| 684 |
"""
|
| 685 |
-
|
| 686 |
"""
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 693 |
return END
|
| 694 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
|
| 696 |
# ====================================================
|
| 697 |
-
# --- Basic Agent Class ---
|
| 698 |
class BasicAgent:
|
| 699 |
def __init__(self):
|
| 700 |
-
print("BasicAgent (
|
| 701 |
|
| 702 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 703 |
if not GROQ_API_KEY:
|
|
@@ -725,7 +721,6 @@ class BasicAgent:
|
|
| 725 |
# Build tool descriptions
|
| 726 |
tool_desc_list = []
|
| 727 |
for tool in self.tools:
|
| 728 |
-
# Use Pydantic schema if available for richer descriptions
|
| 729 |
if tool.args_schema:
|
| 730 |
schema = tool.args_schema.model_json_schema()
|
| 731 |
args_desc = []
|
|
@@ -738,20 +733,19 @@ class BasicAgent:
|
|
| 738 |
desc = f"- {tool.name}: {tool.description}"
|
| 739 |
tool_desc_list.append(desc)
|
| 740 |
tool_descriptions = "\n".join(tool_desc_list)
|
| 741 |
-
|
| 742 |
-
tool_names_str = ", ".join(tool_names_list)
|
| 743 |
-
|
| 744 |
# ==================== SYSTEM PROMPT V7 (Simplified) ====================
|
|
|
|
| 745 |
self.system_prompt = f"""You are a highly intelligent AI assistant for the GAIA benchmark.
|
| 746 |
Your goal: Provide the EXACT answer in the EXACT format requested.
|
| 747 |
|
| 748 |
**PROTOCOL:**
|
| 749 |
|
| 750 |
-
1. **ANALYZE:** Read the question
|
| 751 |
-
2. **ACT:** Call ONE tool to get information.
|
| 752 |
3. **EVALUATE:** Look at the tool's output. Do you have the final answer?
|
| 753 |
-
- **If NO:** Go back to Step
|
| 754 |
-
- **If YES:** Call final_answer_tool immediately.
|
| 755 |
|
| 756 |
**CRITICAL RULES:**
|
| 757 |
|
|
@@ -766,319 +760,165 @@ Your goal: Provide the EXACT answer in the EXACT format requested.
|
|
| 766 |
**EXAMPLE: FINAL ANSWER**
|
| 767 |
{{ "name": "final_answer_tool", "arguments": {{"answer": "28"}} }}
|
| 768 |
|
| 769 |
-
**
|
| 770 |
-
{{ "name": "scrape_and_retrieve", "arguments": {{"url": "https://example.com", "query": "what is X?"}} }}
|
| 771 |
-
|
| 772 |
-
**CRITICAL TOOL CALLING FORMAT:**
|
| 773 |
-
When calling tools, you MUST use this EXACT JSON format:
|
| 774 |
-
{{"name": "tool_name", "arguments": {{"param": "value"}}}}
|
| 775 |
-
|
| 776 |
-
NEVER use XML format like <function(...)>.
|
| 777 |
-
NEVER include tool name in arguments.
|
| 778 |
-
|
| 779 |
-
**AVAILABLE TOOLS:**
|
| 780 |
{tool_descriptions}
|
| 781 |
|
| 782 |
-
|
| 783 |
-
**REMEMBER:** Use tools. Format JSON correctly.
|
| 784 |
"""
|
| 785 |
|
| 786 |
-
print("Initializing Groq
|
| 787 |
try:
|
| 788 |
-
#
|
| 789 |
-
self.
|
| 790 |
temperature=0,
|
| 791 |
groq_api_key=GROQ_API_KEY,
|
| 792 |
-
model_name="
|
| 793 |
max_tokens=4096,
|
| 794 |
-
timeout=60
|
| 795 |
-
#model_kwargs={"response_format": {"type": "json_object"}} # Force JSON
|
| 796 |
).bind_tools(self.tools)
|
| 797 |
-
print("✅
|
| 798 |
|
| 799 |
-
# LLM 2: The Planner (no tools, just reasoning)
|
| 800 |
-
self.planner_llm = ChatGroq(
|
| 801 |
-
temperature=0,
|
| 802 |
-
groq_api_key=GROQ_API_KEY,
|
| 803 |
-
model_name="openai/gpt-oss-120b",
|
| 804 |
-
max_tokens=4096,
|
| 805 |
-
timeout=60
|
| 806 |
-
).bind(tool_choice="none")
|
| 807 |
-
print("✅ Planner LLM (no tools) initialized.")
|
| 808 |
except Exception as e:
|
| 809 |
print(f"❌ Error initializing Groq: {e}")
|
| 810 |
raise
|
| 811 |
-
|
| 812 |
-
# --- Define Planner Prompt ---
|
| 813 |
-
self.planner_prompt = """You are a planning assistant. Your ONLY job is to output a Python list.
|
| 814 |
-
|
| 815 |
-
AVAILABLE TOOLS:
|
| 816 |
-
{tool_names}
|
| 817 |
-
|
| 818 |
-
Original Question: {{original_question}}
|
| 819 |
-
|
| 820 |
-
Recent History:
|
| 821 |
-
{{history}}
|
| 822 |
-
|
| 823 |
-
INSTRUCTIONS:
|
| 824 |
-
1. Check if the task is complete (look for final_answer_tool in history)
|
| 825 |
-
- If YES: Output []
|
| 826 |
-
- If NO: Create 1-2 next steps
|
| 827 |
-
|
| 828 |
-
2. Each step MUST use one of these EXACT tool names:
|
| 829 |
-
- search_tool (for web searches)
|
| 830 |
-
- code_interpreter (for calculations, data processing)
|
| 831 |
-
- scrape_and_retrieve (for specific webpage content)
|
| 832 |
-
- read_file (to read uploaded files)
|
| 833 |
-
- final_answer_tool (when you have the final answer)
|
| 834 |
-
|
| 835 |
-
3. Format: "Use [exact_tool_name] to [specific action]"
|
| 836 |
-
|
| 837 |
-
EXAMPLES:
|
| 838 |
-
["Use search_tool to find information about porterhouse steak"]
|
| 839 |
-
["Use code_interpreter to calculate 15 factorial"]
|
| 840 |
-
["Use scrape_and_retrieve to extract recipe from Reddit"]
|
| 841 |
-
["Use final_answer_tool to submit the answer"]
|
| 842 |
-
[]
|
| 843 |
-
|
| 844 |
-
CRITICAL: Use ONLY the tools listed above. Output ONLY the list.
|
| 845 |
-
|
| 846 |
-
Your response:"""
|
| 847 |
-
|
| 848 |
-
# Store tool names in the prompt
|
| 849 |
-
self.planner_prompt = self.planner_prompt.format(
|
| 850 |
-
tool_names=tool_names_str)
|
| 851 |
|
| 852 |
-
# --- Node 1: The
|
| 853 |
-
def
|
| 854 |
current_turn = state.get('turn', 0) + 1
|
| 855 |
print(f"\n{'='*60}")
|
| 856 |
-
print(f"
|
| 857 |
print('='*60)
|
| 858 |
-
|
| 859 |
-
if current_turn > MAX_TURNS:
|
| 860 |
-
print("--- Max turns reached. Ending. ---")
|
| 861 |
-
return {"plan": [], "turn": current_turn}
|
| 862 |
-
|
| 863 |
-
# DON'T pass along existing plan - always replan!
|
| 864 |
-
|
| 865 |
-
# Get last 10 messages for context
|
| 866 |
-
recent_messages = state['messages'][-10:]
|
| 867 |
-
history_str = "\n".join([
|
| 868 |
-
f"{msg.__class__.__name__}: {str(msg.content)[:200]}..."
|
| 869 |
-
for msg in recent_messages
|
| 870 |
-
])
|
| 871 |
-
|
| 872 |
-
# Extract original question
|
| 873 |
-
original_question = next(
|
| 874 |
-
(msg.content for msg in state['messages'] if isinstance(msg, HumanMessage)),
|
| 875 |
-
"Unknown question"
|
| 876 |
-
)
|
| 877 |
-
|
| 878 |
-
# Check if final_answer_tool was called
|
| 879 |
-
for msg in reversed(state['messages']):
|
| 880 |
-
if isinstance(msg, AIMessage) and msg.tool_calls:
|
| 881 |
-
if any(tc.get('name') == 'final_answer_tool' for tc in msg.tool_calls):
|
| 882 |
-
print("✅ Final answer detected. Ending.")
|
| 883 |
-
return {"plan": [], "turn": current_turn}
|
| 884 |
-
|
| 885 |
-
# Format prompt
|
| 886 |
-
prompt = self.planner_prompt.format(
|
| 887 |
-
original_question=original_question,
|
| 888 |
-
history=history_str
|
| 889 |
-
)
|
| 890 |
-
|
| 891 |
-
# Call planner LLM
|
| 892 |
-
try:
|
| 893 |
-
response = self.planner_llm.invoke(prompt)
|
| 894 |
-
plan_str = response.content
|
| 895 |
-
print(f"Raw planner output: {plan_str[:300]}...")
|
| 896 |
-
except Exception as e:
|
| 897 |
-
print(f"⚠️ Planner LLM failed: {e}")
|
| 898 |
-
return {"plan": [], "turn": current_turn}
|
| 899 |
-
|
| 900 |
-
# Parse plan with multiple strategies
|
| 901 |
-
plan_list = []
|
| 902 |
-
|
| 903 |
-
# Strategy 1: Try to find a list in the output
|
| 904 |
-
match = re.search(r'\[([^\]]*)\]', plan_str, re.DOTALL)
|
| 905 |
-
if match:
|
| 906 |
-
try:
|
| 907 |
-
list_str = '[' + match.group(1) + ']'
|
| 908 |
-
# Clean up common issues
|
| 909 |
-
list_str = list_str.replace('\n', ' ')
|
| 910 |
-
list_str = re.sub(r'\s+', ' ', list_str) # Normalize whitespace
|
| 911 |
-
|
| 912 |
-
parsed = json.loads(list_str)
|
| 913 |
-
if isinstance(parsed, list) and all(isinstance(x, str) for x in parsed):
|
| 914 |
-
plan_list = parsed
|
| 915 |
-
print(f"✅ Parsed plan: {plan_list}")
|
| 916 |
-
except json.JSONDecodeError:
|
| 917 |
-
print(f"⚠️ Failed to parse as JSON")
|
| 918 |
-
|
| 919 |
-
# Strategy 2: Look for quoted strings if JSON parsing failed
|
| 920 |
-
if not plan_list:
|
| 921 |
-
quoted_strings = re.findall(r'"([^"]+)"', plan_str)
|
| 922 |
-
if quoted_strings and len(quoted_strings) <= 5:
|
| 923 |
-
# Check if they look like tool steps
|
| 924 |
-
valid_steps = []
|
| 925 |
-
for s in quoted_strings:
|
| 926 |
-
if any(tool.name in s.lower() for tool in self.tools):
|
| 927 |
-
valid_steps.append(s)
|
| 928 |
-
if valid_steps:
|
| 929 |
-
plan_list = valid_steps
|
| 930 |
-
print(f"✅ Extracted steps from quotes: {plan_list}")
|
| 931 |
-
|
| 932 |
-
# Validate plan
|
| 933 |
-
if plan_list:
|
| 934 |
-
# Remove any non-descriptive or invalid steps
|
| 935 |
-
validated_plan = []
|
| 936 |
-
for step in plan_list:
|
| 937 |
-
step_lower = step.lower().strip()
|
| 938 |
-
|
| 939 |
-
# Check if step mentions ANY tool
|
| 940 |
-
mentioned_tool = None
|
| 941 |
-
for tool in self.tools:
|
| 942 |
-
if tool.name.lower() in step_lower:
|
| 943 |
-
mentioned_tool = tool.name
|
| 944 |
-
break
|
| 945 |
-
|
| 946 |
-
if mentioned_tool:
|
| 947 |
-
# Valid step - has a real tool name
|
| 948 |
-
validated_plan.append(step)
|
| 949 |
-
print(f"✅ Accepted step: '{step}' (uses {mentioned_tool})")
|
| 950 |
-
else:
|
| 951 |
-
# Invalid - no real tool mentioned
|
| 952 |
-
print(f"❌ Rejected step: '{step}' (no valid tool name found)")
|
| 953 |
-
|
| 954 |
-
plan_list = validated_plan
|
| 955 |
-
|
| 956 |
-
if not plan_list:
|
| 957 |
-
print("⚠️ No valid plan generated. Ending.")
|
| 958 |
|
| 959 |
-
|
| 960 |
-
|
|
|
|
| 961 |
|
| 962 |
-
# --- Node 2: The Executor ---
|
| 963 |
-
def executor_node(state: AgentState):
|
| 964 |
-
print(f"\n--- EXECUTOR ---")
|
| 965 |
-
|
| 966 |
-
plan = state.get('plan', [])
|
| 967 |
-
if not plan:
|
| 968 |
-
print("⚠️ No plan to execute!")
|
| 969 |
-
return {"messages": [], "plan": []}
|
| 970 |
-
|
| 971 |
-
current_step = plan[0]
|
| 972 |
-
print(f"Executing Step: {current_step}")
|
| 973 |
-
|
| 974 |
-
# Build executor message
|
| 975 |
-
executor_messages = state['messages'] + [
|
| 976 |
-
HumanMessage(content=f"""Execute: {current_step}
|
| 977 |
-
|
| 978 |
-
Available tools: search_tool, code_interpreter, scrape_and_retrieve, final_answer_tool
|
| 979 |
-
|
| 980 |
-
Call ONE tool in JSON format: {{"name": "tool_name", "arguments": {{...}}}}""")
|
| 981 |
-
]
|
| 982 |
-
|
| 983 |
-
# Try to call LLM
|
| 984 |
max_retries = 3
|
| 985 |
ai_message = None
|
| 986 |
for attempt in range(max_retries):
|
| 987 |
try:
|
| 988 |
-
|
|
|
|
| 989 |
break
|
| 990 |
except Exception as e:
|
| 991 |
-
print(f"⚠️
|
| 992 |
if attempt == max_retries - 1:
|
| 993 |
-
ai_message = AIMessage(
|
|
|
|
|
|
|
| 994 |
time.sleep(2 ** attempt)
|
| 995 |
|
| 996 |
-
# Fallback
|
| 997 |
if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
|
| 998 |
parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
|
| 999 |
if parsed_tool_calls:
|
| 1000 |
-
print("🔧 Fallback SUCCESS:
|
| 1001 |
ai_message.tool_calls = parsed_tool_calls
|
| 1002 |
-
ai_message.content = ""
|
| 1003 |
else:
|
| 1004 |
-
print(f"⚠️ Fallback FAILED")
|
| 1005 |
|
| 1006 |
if ai_message.tool_calls:
|
| 1007 |
-
|
| 1008 |
else:
|
| 1009 |
-
print("
|
| 1010 |
|
| 1011 |
-
|
| 1012 |
-
return {"messages": [ai_message], "plan": []}
|
| 1013 |
|
| 1014 |
# --- Tool Node ---
|
| 1015 |
tool_node = ToolNode(self.tools)
|
| 1016 |
|
| 1017 |
# --- Build Graph ---
|
| 1018 |
-
print("Building
|
| 1019 |
graph_builder = StateGraph(AgentState)
|
| 1020 |
|
| 1021 |
-
graph_builder.add_node("
|
| 1022 |
-
graph_builder.add_node("executor", executor_node)
|
| 1023 |
graph_builder.add_node("tools", tool_node)
|
| 1024 |
|
| 1025 |
-
graph_builder.add_edge(START, "
|
| 1026 |
|
| 1027 |
graph_builder.add_conditional_edges(
|
| 1028 |
-
"
|
| 1029 |
-
|
| 1030 |
{
|
| 1031 |
-
"
|
|
|
|
| 1032 |
END: END
|
| 1033 |
}
|
| 1034 |
)
|
| 1035 |
|
| 1036 |
-
graph_builder.add_edge("
|
| 1037 |
-
graph_builder.add_edge("tools", "planner") # Loop back to planner
|
| 1038 |
|
| 1039 |
self.graph = graph_builder.compile()
|
| 1040 |
-
print("✅
|
| 1041 |
|
| 1042 |
def __call__(self, question: str) -> str:
|
| 1043 |
print(f"\n--- Starting Agent Run for Question ---")
|
| 1044 |
-
print(f"
|
| 1045 |
|
|
|
|
| 1046 |
graph_input = {
|
| 1047 |
"messages": [
|
| 1048 |
SystemMessage(content=self.system_prompt),
|
| 1049 |
HumanMessage(content=question)
|
| 1050 |
],
|
| 1051 |
-
"plan": [],
|
| 1052 |
"turn": 0
|
| 1053 |
}
|
| 1054 |
|
| 1055 |
final_answer = "AGENT FAILED TO PRODUCE ANSWER"
|
| 1056 |
try:
|
| 1057 |
-
config = {"recursion_limit":
|
| 1058 |
-
|
| 1059 |
for event in self.graph.stream(graph_input, stream_mode="values", config=config):
|
| 1060 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1061 |
|
| 1062 |
-
# Check for final answer
|
| 1063 |
if isinstance(last_message, AIMessage) and last_message.tool_calls:
|
| 1064 |
-
|
| 1065 |
-
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1074 |
cleaned_answer = str(final_answer).strip()
|
| 1075 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1076 |
return cleaned_answer
|
| 1077 |
-
|
| 1078 |
except Exception as e:
|
| 1079 |
-
print(f"
|
| 1080 |
-
traceback.
|
| 1081 |
-
|
|
|
|
| 1082 |
|
| 1083 |
|
| 1084 |
# ====================================================
|
|
@@ -1095,7 +935,7 @@ except Exception as e:
|
|
| 1095 |
|
| 1096 |
# ====================================================
|
| 1097 |
# --- (Original Template Code - Mock Questions Version) ---
|
| 1098 |
-
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 1099 |
"""
|
| 1100 |
Fetches MOCK questions, runs the BasicAgent on them, simulates submission prep,
|
| 1101 |
and displays the results. DOES NOT SUBMIT.
|
|
@@ -1109,7 +949,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 1109 |
return "FATAL ERROR: Global agent failed to initialize. Check logs.", None
|
| 1110 |
|
| 1111 |
print("Using globally instantiated agent.")
|
| 1112 |
-
agent_code = f"
|
| 1113 |
print(f"Agent code URL: {agent_code}")
|
| 1114 |
print("--- USING MOCK QUESTIONS ---")
|
| 1115 |
|
|
@@ -1198,9 +1038,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 1198 |
"task_id": "mock_level1_020",
|
| 1199 |
"question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
|
| 1200 |
}
|
| 1201 |
-
#
|
| 1202 |
-
# ^^^ PASTE YOUR FULL LIST OF 20 MOCK QUESTIONS HERE ^^^
|
| 1203 |
-
#
|
| 1204 |
]
|
| 1205 |
|
| 1206 |
questions_data = mock_questions_data
|
|
@@ -1237,7 +1074,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 1237 |
|
| 1238 |
status_update = f"Finished mock run. Processed {len(answers_payload)} answers for '{username}'."
|
| 1239 |
print(status_update); print("--- MOCK RUN - SUBMISSION SKIPPED ---")
|
| 1240 |
-
final_status = "---
|
| 1241 |
results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
|
| 1242 |
return final_status, results_df
|
| 1243 |
|
|
@@ -1247,7 +1084,7 @@ with gr.Blocks() as demo:
|
|
| 1247 |
gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
|
| 1248 |
gr.Markdown("""
|
| 1249 |
**Instructions:** Click 'Run Mock Evaluation'.
|
| 1250 |
-
**Notes:** Uses Groq (Llama
|
| 1251 |
""")
|
| 1252 |
gr.LoginButton()
|
| 1253 |
run_button = gr.Button("Run Mock Evaluation")
|
|
@@ -1257,7 +1094,7 @@ with gr.Blocks() as demo:
|
|
| 1257 |
|
| 1258 |
if __name__ == "__main__":
|
| 1259 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 1260 |
-
space_host_startup = os.getenv("SPACE_ID"); space_id_startup = os.getenv("SPACE_ID")
|
| 1261 |
if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n Runtime URL: https://{space_host_startup}.hf.space")
|
| 1262 |
else: print("ℹ️ No SPACE_HOST (local?).")
|
| 1263 |
if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n Repo URL: https://huggingface.co/spaces/{space_id_startup}\n Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
@@ -1272,3 +1109,4 @@ if __name__ == "__main__":
|
|
| 1272 |
print("Launching Gradio Interface...")
|
| 1273 |
demo.queue().launch(debug=True, share=False)
|
| 1274 |
|
|
|
|
|
|
| 12 |
import re
|
| 13 |
import uuid
|
| 14 |
import time
|
| 15 |
+
import ast # <-- Import ast module
|
| 16 |
|
| 17 |
# --- Pydantic Import ---
|
| 18 |
from pydantic import BaseModel, Field
|
|
|
|
| 63 |
agent = None
|
| 64 |
|
| 65 |
# ====================================================
|
| 66 |
+
# --- Tool Definitions (Unchanged) ---
|
| 67 |
|
| 68 |
class SearchInput(BaseModel):
|
| 69 |
query: str = Field(description="The search query.")
|
|
|
|
| 465 |
return text
|
| 466 |
return original_text
|
| 467 |
|
| 468 |
+
|
| 469 |
+
# --- *** ROBUST FALLBACK PARSER *** ---
|
| 470 |
def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
|
| 471 |
"""
|
| 472 |
Parses malformed tool call strings (dribbled) from an LLM response.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
"""
|
| 474 |
+
print(f"Original LLM content for fallback parsing:\n---\n{content}\n---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
tool_name = None
|
| 476 |
tool_input = None
|
| 477 |
+
cleaned_str = None # For storing cleaned string before parsing
|
| 478 |
+
|
| 479 |
+
# STRATEGY 1: Try to parse <function(tool_name)>...{json_string}...
|
| 480 |
+
# This also handles <function=tool_name>...{json_string}...
|
| 481 |
+
func_match = re.search(
|
| 482 |
+
r"<function[(=]\s*([^)]+)\s*[)>](.*)", # <-- More robust regex
|
| 483 |
+
content,
|
| 484 |
+
re.DOTALL | re.IGNORECASE
|
| 485 |
+
)
|
| 486 |
|
| 487 |
if func_match:
|
| 488 |
try:
|
| 489 |
+
tool_name = func_match.group(1).strip().replace("'", "").replace('"', '') # Clean tool name
|
| 490 |
+
remaining_content = func_match.group(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
|
| 492 |
+
json_start_index = remaining_content.find('{')
|
| 493 |
+
if json_start_index != -1:
|
| 494 |
+
json_str = remaining_content[json_start_index:]
|
| 495 |
+
# --- Aggressive Cleaning ---
|
| 496 |
+
cleaned_str = json_str.strip()
|
| 497 |
+
cleaned_str = ''.join(c for c in cleaned_str if c.isprintable() or c in '\n\r\t')
|
| 498 |
+
cleaned_str = cleaned_str.strip().rstrip(',')
|
| 499 |
+
|
| 500 |
+
tool_input = json.loads(cleaned_str)
|
| 501 |
+
print(f"🔧 Fallback (Format 1 - json.loads): Parsed tool call for '{tool_name}'")
|
| 502 |
else:
|
| 503 |
+
print(f"⚠️ Fallback (Format 1): Found <function> but no JSON blob.")
|
| 504 |
tool_name = None
|
| 505 |
|
| 506 |
except json.JSONDecodeError as e:
|
| 507 |
+
print(f"⚠️ Fallback (Format 1): json.loads failed after cleaning: {e}. Trying ast.literal_eval.")
|
| 508 |
+
try:
|
| 509 |
+
# Secondary attempt with ast.literal_eval
|
| 510 |
+
if cleaned_str:
|
| 511 |
+
potential_input = ast.literal_eval(cleaned_str)
|
| 512 |
+
if isinstance(potential_input, dict):
|
| 513 |
+
tool_input = potential_input
|
| 514 |
+
print(f"🔧 Fallback (Format 1 - ast.literal_eval): Parsed tool call for '{tool_name}'")
|
| 515 |
+
else:
|
| 516 |
+
print(f"⚠️ Fallback (Format 1): ast.literal_eval did not produce a dict.")
|
| 517 |
+
tool_name = None
|
| 518 |
+
else:
|
| 519 |
+
tool_name = None
|
| 520 |
+
|
| 521 |
+
except (SyntaxError, ValueError) as ast_e:
|
| 522 |
+
print(f"⚠️ Fallback (Format 1): ast.literal_eval also failed: {ast_e}")
|
| 523 |
+
tool_name = None
|
| 524 |
+
except Exception as e_inner:
|
| 525 |
+
print(f"⚠️ Fallback (Format 1): Unexpected error during ast.literal_eval: {e_inner}")
|
| 526 |
+
tool_name = None
|
| 527 |
|
| 528 |
# ========================================================================
|
| 529 |
# STRATEGY 2: Try to parse bare JSON (if Strategy 1 failed)
|
|
|
|
| 648 |
]
|
| 649 |
|
| 650 |
|
| 651 |
+
# --- *** NEW: Reverted AgentState *** ---
|
| 652 |
class AgentState(TypedDict):
|
| 653 |
messages: Annotated[List[AnyMessage], add_messages]
|
|
|
|
| 654 |
turn: int
|
| 655 |
|
| 656 |
|
| 657 |
+
# --- *** NEW: Reverted Conditional Edge Function *** ---
|
| 658 |
+
def should_continue(state: AgentState):
|
| 659 |
"""
|
| 660 |
+
Decide whether to continue, call tools, or end.
|
| 661 |
"""
|
| 662 |
+
last_message = state['messages'][-1]
|
| 663 |
+
current_turn = state.get('turn', 0)
|
| 664 |
+
|
| 665 |
+
# 1. Check for final_answer_tool
|
| 666 |
+
if isinstance(last_message, AIMessage) and last_message.tool_calls:
|
| 667 |
+
for tool_call in last_message.tool_calls:
|
| 668 |
+
if tool_call.get("name") == "final_answer_tool":
|
| 669 |
+
print("--- Condition: final_answer_tool called, ending. ---")
|
| 670 |
+
return END
|
| 671 |
+
|
| 672 |
+
# 2. Check turn limit
|
| 673 |
+
if current_turn >= MAX_TURNS:
|
| 674 |
+
print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
|
| 675 |
return END
|
| 676 |
|
| 677 |
+
# 3. Route to tools if tool calls exist
|
| 678 |
+
if isinstance(last_message, AIMessage) and last_message.tool_calls:
|
| 679 |
+
print("--- Condition: Tools called, routing to tools node. ---")
|
| 680 |
+
return "tools"
|
| 681 |
+
|
| 682 |
+
# 4. Loop prevention
|
| 683 |
+
if len(state['messages']) > 2 and isinstance(last_message, AIMessage) and isinstance(state['messages'][-2], AIMessage):
|
| 684 |
+
print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
|
| 685 |
+
return END
|
| 686 |
+
|
| 687 |
+
# 5. Loop back to agent (reasoning/planning step)
|
| 688 |
+
print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
|
| 689 |
+
return "agent"
|
| 690 |
+
|
| 691 |
|
| 692 |
# ====================================================
|
| 693 |
+
# --- *** NEW: Reverted Basic Agent Class *** ---
|
| 694 |
class BasicAgent:
|
| 695 |
def __init__(self):
|
| 696 |
+
print("BasicAgent (Single LLM) initializing...")
|
| 697 |
|
| 698 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 699 |
if not GROQ_API_KEY:
|
|
|
|
| 721 |
# Build tool descriptions
|
| 722 |
tool_desc_list = []
|
| 723 |
for tool in self.tools:
|
|
|
|
| 724 |
if tool.args_schema:
|
| 725 |
schema = tool.args_schema.model_json_schema()
|
| 726 |
args_desc = []
|
|
|
|
| 733 |
desc = f"- {tool.name}: {tool.description}"
|
| 734 |
tool_desc_list.append(desc)
|
| 735 |
tool_descriptions = "\n".join(tool_desc_list)
|
| 736 |
+
|
|
|
|
|
|
|
| 737 |
# ==================== SYSTEM PROMPT V7 (Simplified) ====================
|
| 738 |
+
# This prompt is for a single, powerful agent
|
| 739 |
self.system_prompt = f"""You are a highly intelligent AI assistant for the GAIA benchmark.
|
| 740 |
Your goal: Provide the EXACT answer in the EXACT format requested.
|
| 741 |
|
| 742 |
**PROTOCOL:**
|
| 743 |
|
| 744 |
+
1. **ANALYZE:** Read the question and history. What is the next logical step?
|
| 745 |
+
2. **ACT:** Call ONE tool to get information or perform a calculation.
|
| 746 |
3. **EVALUATE:** Look at the tool's output. Do you have the final answer?
|
| 747 |
+
- **If NO:** Go back to Step 1 and decide the *next* step.
|
| 748 |
+
- **If YES:** Call final_answer_tool immediately with the answer.
|
| 749 |
|
| 750 |
**CRITICAL RULES:**
|
| 751 |
|
|
|
|
| 760 |
**EXAMPLE: FINAL ANSWER**
|
| 761 |
{{ "name": "final_answer_tool", "arguments": {{"answer": "28"}} }}
|
| 762 |
|
| 763 |
+
**TOOLS:**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 764 |
{tool_descriptions}
|
| 765 |
|
| 766 |
+
**REMEMBER:** One step at a time. Use tools. Format JSON correctly.
|
|
|
|
| 767 |
"""
|
| 768 |
|
| 769 |
+
print("Initializing Groq LLM...")
|
| 770 |
try:
|
| 771 |
+
# --- Initialize ONE Powerful LLM for all tasks ---
|
| 772 |
+
self.llm_with_tools = ChatGroq(
|
| 773 |
temperature=0,
|
| 774 |
groq_api_key=GROQ_API_KEY,
|
| 775 |
+
model_name="llama-3.3-70b-versatile", # <-- Use the powerful model
|
| 776 |
max_tokens=4096,
|
| 777 |
+
timeout=60
|
|
|
|
| 778 |
).bind_tools(self.tools)
|
| 779 |
+
print("✅ Main LLM (llama-3.3-70b-versatile with tools) initialized.")
|
| 780 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 781 |
except Exception as e:
|
| 782 |
print(f"❌ Error initializing Groq: {e}")
|
| 783 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 784 |
|
| 785 |
+
# --- Node 1: The Agent ---
|
| 786 |
+
def agent_node(state: AgentState):
|
| 787 |
current_turn = state.get('turn', 0) + 1
|
| 788 |
print(f"\n{'='*60}")
|
| 789 |
+
print(f"AGENT TURN {current_turn}/{MAX_TURNS}")
|
| 790 |
print('='*60)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 791 |
|
| 792 |
+
# Note: Max turns is also checked in should_continue, but good to have here
|
| 793 |
+
if current_turn > MAX_TURNS:
|
| 794 |
+
return {"messages": [SystemMessage(content="Max turns reached.")]}
|
| 795 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
max_retries = 3
|
| 797 |
ai_message = None
|
| 798 |
for attempt in range(max_retries):
|
| 799 |
try:
|
| 800 |
+
# Call the single, powerful LLM
|
| 801 |
+
ai_message = self.llm_with_tools.invoke(state["messages"])
|
| 802 |
break
|
| 803 |
except Exception as e:
|
| 804 |
+
print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {e}")
|
| 805 |
if attempt == max_retries - 1:
|
| 806 |
+
ai_message = AIMessage(
|
| 807 |
+
content=f"Error: LLM failed after {max_retries} attempts: {e}"
|
| 808 |
+
)
|
| 809 |
time.sleep(2 ** attempt)
|
| 810 |
|
| 811 |
+
# --- Fallback Parsing Logic ---
|
| 812 |
if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
|
| 813 |
parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
|
| 814 |
if parsed_tool_calls:
|
| 815 |
+
print("🔧 Fallback SUCCESS: Rebuilding tool call(s).")
|
| 816 |
ai_message.tool_calls = parsed_tool_calls
|
| 817 |
+
ai_message.content = "" # Clear the text content
|
| 818 |
else:
|
| 819 |
+
print(f"⚠️ Fallback FAILED: Could not parse any tool call from content:\n{ai_message.content[:200]}...")
|
| 820 |
|
| 821 |
if ai_message.tool_calls:
|
| 822 |
+
print(f"🔧 Agent Tool Call: {ai_message.tool_calls[0]['name']}")
|
| 823 |
else:
|
| 824 |
+
print(f"💭 Agent Reasoning: {ai_message.content[:200]}...")
|
| 825 |
|
| 826 |
+
return {"messages": [ai_message], "turn": current_turn}
|
|
|
|
| 827 |
|
| 828 |
# --- Tool Node ---
|
| 829 |
tool_node = ToolNode(self.tools)
|
| 830 |
|
| 831 |
# --- Build Graph ---
|
| 832 |
+
print("Building Single-Agent graph...")
|
| 833 |
graph_builder = StateGraph(AgentState)
|
| 834 |
|
| 835 |
+
graph_builder.add_node("agent", agent_node)
|
|
|
|
| 836 |
graph_builder.add_node("tools", tool_node)
|
| 837 |
|
| 838 |
+
graph_builder.add_edge(START, "agent")
|
| 839 |
|
| 840 |
graph_builder.add_conditional_edges(
|
| 841 |
+
"agent",
|
| 842 |
+
should_continue, # Use the reverted conditional function
|
| 843 |
{
|
| 844 |
+
"tools": "tools",
|
| 845 |
+
"agent": "agent", # For loop prevention
|
| 846 |
END: END
|
| 847 |
}
|
| 848 |
)
|
| 849 |
|
| 850 |
+
graph_builder.add_edge("tools", "agent") # Loop back to agent
|
|
|
|
| 851 |
|
| 852 |
self.graph = graph_builder.compile()
|
| 853 |
+
print("✅ Single-Agent graph compiled successfully.")
|
| 854 |
|
| 855 |
def __call__(self, question: str) -> str:
|
| 856 |
print(f"\n--- Starting Agent Run for Question ---")
|
| 857 |
+
print(f"Agent received question (first 100 chars): {question[:100]}...")
|
| 858 |
|
| 859 |
+
# --- Initialize Reverted AgentState (no plan) ---
|
| 860 |
graph_input = {
|
| 861 |
"messages": [
|
| 862 |
SystemMessage(content=self.system_prompt),
|
| 863 |
HumanMessage(content=question)
|
| 864 |
],
|
|
|
|
| 865 |
"turn": 0
|
| 866 |
}
|
| 867 |
|
| 868 |
final_answer = "AGENT FAILED TO PRODUCE ANSWER"
|
| 869 |
try:
|
| 870 |
+
config = {"recursion_limit": MAX_TURNS + 5}
|
|
|
|
| 871 |
for event in self.graph.stream(graph_input, stream_mode="values", config=config):
|
| 872 |
+
|
| 873 |
+
if event.get('messages'): # Ensure messages exist
|
| 874 |
+
last_message = event["messages"][-1]
|
| 875 |
+
else:
|
| 876 |
+
continue # Skip if no messages yet
|
| 877 |
|
| 878 |
+
# Check for final answer extraction
|
| 879 |
if isinstance(last_message, AIMessage) and last_message.tool_calls:
|
| 880 |
+
if last_message.tool_calls[0].get("name") == "final_answer_tool":
|
| 881 |
+
final_answer_args = last_message.tool_calls[0].get('args', {})
|
| 882 |
+
if 'answer' in final_answer_args:
|
| 883 |
+
final_answer = final_answer_args['answer']
|
| 884 |
+
print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
|
| 885 |
+
break
|
| 886 |
+
else:
|
| 887 |
+
print(f"⚠️ Final Answer tool called without 'answer' argument: {final_answer_args}")
|
| 888 |
+
final_answer = "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER"
|
| 889 |
+
break
|
| 890 |
+
|
| 891 |
+
elif isinstance(last_message, ToolMessage):
|
| 892 |
+
print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
|
| 893 |
+
elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
|
| 894 |
+
print(f"AI Message (Reasoning): {last_message.content[:500]}...")
|
| 895 |
+
elif isinstance(last_message, SystemMessage):
|
| 896 |
+
print(f"System Message: {last_message.content[:500]}...")
|
| 897 |
+
|
| 898 |
+
|
| 899 |
+
# --- Final Answer Cleaning ---
|
| 900 |
cleaned_answer = str(final_answer).strip()
|
| 901 |
+
prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
|
| 902 |
+
original_cleaned = cleaned_answer
|
| 903 |
+
for prefix in prefixes_to_remove:
|
| 904 |
+
if cleaned_answer.lower().startswith(prefix.lower()):
|
| 905 |
+
potential_answer = cleaned_answer[len(prefix):].strip()
|
| 906 |
+
if potential_answer:
|
| 907 |
+
cleaned_answer = potential_answer
|
| 908 |
+
break
|
| 909 |
+
|
| 910 |
+
cleaned_answer = remove_fences_simple(cleaned_answer)
|
| 911 |
+
if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
|
| 912 |
+
cleaned_answer = cleaned_answer[1:-1].strip()
|
| 913 |
+
|
| 914 |
+
print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
|
| 915 |
return cleaned_answer
|
| 916 |
+
|
| 917 |
except Exception as e:
|
| 918 |
+
print(f"Error running agent graph: {e}")
|
| 919 |
+
tb_str = traceback.format_exc()
|
| 920 |
+
print(tb_str)
|
| 921 |
+
return f"AGENT GRAPH ERROR: {e}"
|
| 922 |
|
| 923 |
|
| 924 |
# ====================================================
|
|
|
|
| 935 |
|
| 936 |
# ====================================================
|
| 937 |
# --- (Original Template Code - Mock Questions Version) ---
|
| 938 |
+
def run_and_submit_all( profile: gr.OAuthProfile | None): # Corrected type hint
|
| 939 |
"""
|
| 940 |
Fetches MOCK questions, runs the BasicAgent on them, simulates submission prep,
|
| 941 |
and displays the results. DOES NOT SUBMIT.
|
|
|
|
| 949 |
return "FATAL ERROR: Global agent failed to initialize. Check logs.", None
|
| 950 |
|
| 951 |
print("Using globally instantiated agent.")
|
| 952 |
+
agent_code = f"httpsS://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run" # Corrected URL
|
| 953 |
print(f"Agent code URL: {agent_code}")
|
| 954 |
print("--- USING MOCK QUESTIONS ---")
|
| 955 |
|
|
|
|
| 1038 |
"task_id": "mock_level1_020",
|
| 1039 |
"question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
|
| 1040 |
}
|
|
|
|
|
|
|
|
|
|
| 1041 |
]
|
| 1042 |
|
| 1043 |
questions_data = mock_questions_data
|
|
|
|
| 1074 |
|
| 1075 |
status_update = f"Finished mock run. Processed {len(answers_payload)} answers for '{username}'."
|
| 1076 |
print(status_update); print("--- MOCK RUN - SUBMISSION SKIPPED ---")
|
| 1077 |
+
final_status = "--- Mock RUN COMPLETE ---\n" + status_update + "\nSubmission SKIPPED." # Corrected typo
|
| 1078 |
results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
|
| 1079 |
return final_status, results_df
|
| 1080 |
|
|
|
|
| 1084 |
gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
|
| 1085 |
gr.Markdown("""
|
| 1086 |
**Instructions:** Click 'Run Mock Evaluation'.
|
| 1087 |
+
**Notes:** Uses Groq (Llama-3.3-70b Executor). Ensure `GROQ_API_KEY` secret/env var exists. **DOES NOT** fetch official Qs or submit. Check logs for details.
|
| 1088 |
""")
|
| 1089 |
gr.LoginButton()
|
| 1090 |
run_button = gr.Button("Run Mock Evaluation")
|
|
|
|
| 1094 |
|
| 1095 |
if __name__ == "__main__":
|
| 1096 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 1097 |
+
space_host_startup = os.getenv("SPACE_ID"); space_id_startup = os.getenv("SPACE_ID") # Corrected variable name
|
| 1098 |
if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n Runtime URL: https://{space_host_startup}.hf.space")
|
| 1099 |
else: print("ℹ️ No SPACE_HOST (local?).")
|
| 1100 |
if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n Repo URL: https://huggingface.co/spaces/{space_id_startup}\n Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
|
|
| 1109 |
print("Launching Gradio Interface...")
|
| 1110 |
demo.queue().launch(debug=True, share=False)
|
| 1111 |
|
| 1112 |
+
|