Spaces:
Runtime error
Runtime error
Omachoko
commited on
Commit
·
443c7ca
1
Parent(s):
6545c63
Restore original Gradio interface while maintaining enhanced GAIA agent features
Browse files
app.py
CHANGED
|
@@ -769,86 +769,70 @@ class ModularGAIAAgent:
|
|
| 769 |
logger.error(f"Batch processing overall error: {e}")
|
| 770 |
yield "Error in batch processing", []
|
| 771 |
|
| 772 |
-
# ---
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 786 |
|
|
|
|
| 787 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 788 |
-
"""
|
| 789 |
-
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 790 |
-
and displays the results.
|
| 791 |
-
"""
|
| 792 |
space_id = os.getenv("SPACE_ID")
|
| 793 |
if profile:
|
| 794 |
-
username =
|
| 795 |
print(f"User logged in: {username}")
|
| 796 |
else:
|
| 797 |
-
print("User not logged in.")
|
| 798 |
return "Please Login to Hugging Face with the button.", None
|
|
|
|
| 799 |
api_url = DEFAULT_API_URL
|
| 800 |
questions_url = f"{api_url}/questions"
|
| 801 |
submit_url = f"{api_url}/submit"
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
except Exception as e:
|
| 805 |
-
print(f"Error instantiating agent: {e}")
|
| 806 |
-
return f"Error initializing agent: {e}", None
|
| 807 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 808 |
-
|
| 809 |
-
print(f"Fetching questions from: {questions_url}")
|
| 810 |
try:
|
| 811 |
response = requests.get(questions_url, timeout=15)
|
| 812 |
response.raise_for_status()
|
| 813 |
questions_data = response.json()
|
| 814 |
-
if not questions_data:
|
| 815 |
-
print("Fetched questions list is empty.")
|
| 816 |
-
return "Fetched questions list is empty or invalid format.", None
|
| 817 |
-
print(f"Fetched {len(questions_data)} questions.")
|
| 818 |
-
except requests.exceptions.RequestException as e:
|
| 819 |
-
print(f"Error fetching questions: {e}")
|
| 820 |
-
return f"Error fetching questions: {e}", None
|
| 821 |
-
except requests.exceptions.JSONDecodeError as e:
|
| 822 |
-
print(f"Error decoding JSON response from questions endpoint: {e}")
|
| 823 |
-
print(f"Response text: {response.text[:500]}")
|
| 824 |
-
return f"Error decoding server response for questions: {e}", None
|
| 825 |
except Exception as e:
|
| 826 |
-
|
| 827 |
-
|
| 828 |
results_log = []
|
| 829 |
answers_payload = []
|
| 830 |
-
|
|
|
|
| 831 |
for item in questions_data:
|
| 832 |
task_id = item.get("task_id")
|
| 833 |
question_text = item.get("question")
|
| 834 |
-
|
| 835 |
-
if not task_id or question_text is None:
|
| 836 |
-
print(f"Skipping item with missing task_id or question: {item}")
|
| 837 |
continue
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
print(f"Error running agent on task {task_id}: {e}")
|
| 844 |
-
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
| 845 |
if not answers_payload:
|
| 846 |
-
print("Agent did not produce any answers to submit.")
|
| 847 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
|
|
|
| 848 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 849 |
-
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 850 |
-
print(status_update)
|
| 851 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
|
|
|
| 852 |
try:
|
| 853 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
| 854 |
response.raise_for_status()
|
|
@@ -858,130 +842,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 858 |
f"User: {result_data.get('username')}\n"
|
| 859 |
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
| 860 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 861 |
-
f"Message: {result_data.get('message', 'No message received.')}"
|
| 862 |
-
|
| 863 |
results_df = pd.DataFrame(results_log)
|
| 864 |
return final_status, results_df
|
| 865 |
-
except requests.exceptions.HTTPError as e:
|
| 866 |
-
error_detail = f"Server responded with status {e.response.status_code}."
|
| 867 |
-
try:
|
| 868 |
-
error_json = e.response.json()
|
| 869 |
-
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
| 870 |
-
except requests.exceptions.JSONDecodeError:
|
| 871 |
-
error_detail += f" Response: {e.response.text[:500]}"
|
| 872 |
-
status_message = f"Submission Failed: {error_detail}"
|
| 873 |
-
print(status_message)
|
| 874 |
-
results_df = pd.DataFrame(results_log)
|
| 875 |
-
return status_message, results_df
|
| 876 |
-
except requests.exceptions.Timeout:
|
| 877 |
-
status_message = "Submission Failed: The request timed out."
|
| 878 |
-
print(status_message)
|
| 879 |
-
results_df = pd.DataFrame(results_log)
|
| 880 |
-
return status_message, results_df
|
| 881 |
-
except requests.exceptions.RequestException as e:
|
| 882 |
-
status_message = f"Submission Failed: Network error - {e}"
|
| 883 |
-
print(status_message)
|
| 884 |
-
results_df = pd.DataFrame(results_log)
|
| 885 |
-
return status_message, results_df
|
| 886 |
except Exception as e:
|
| 887 |
-
|
| 888 |
-
print(status_message)
|
| 889 |
-
results_df = pd.DataFrame(results_log)
|
| 890 |
-
return status_message, results_df
|
| 891 |
-
|
| 892 |
-
# --- Gradio UI with Enhanced Feedback and Control ---
|
| 893 |
-
with gr.Blocks(title="GAIA Agent - Multi-Tab with Progress Tracking") as app:
|
| 894 |
-
gr.Markdown("# GAIA Agent for Hugging Face AI Agents Course\nTarget: 30%+ on GAIA Benchmark for Certification")
|
| 895 |
-
with gr.Tabs() as tabs:
|
| 896 |
-
# Tab 1: Fetch GAIA Questions with Progress
|
| 897 |
-
with gr.TabItem("Fetch GAIA Questions"):
|
| 898 |
-
with gr.Row():
|
| 899 |
-
token_input = gr.Textbox(label="Hugging Face Token", placeholder="Enter your HF token", type="password")
|
| 900 |
-
fetch_btn = gr.Button("Fetch Questions")
|
| 901 |
-
fetch_progress = gr.Textbox(label="Progress", value="Not started", interactive=False)
|
| 902 |
-
questions_output = gr.JSON(label="Fetched Questions")
|
| 903 |
-
fetch_btn.click(
|
| 904 |
-
fn=lambda token: ("Fetching...", agent.fetch_questions(token)),
|
| 905 |
-
inputs=token_input,
|
| 906 |
-
outputs=[fetch_progress, questions_output]
|
| 907 |
-
)
|
| 908 |
-
# Tab 2: Manual Question Input with Detailed Feedback
|
| 909 |
-
with gr.TabItem("Manual Question Input"):
|
| 910 |
-
question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here")
|
| 911 |
-
with gr.Row():
|
| 912 |
-
file_upload = gr.File(label="Upload File (optional)", file_types=[".jpg", ".png", ".mp3", ".csv", ".xlsx", ".py"])
|
| 913 |
-
context_upload = gr.File(label="Context Files (optional)", file_count="multiple")
|
| 914 |
-
answer_btn = gr.Button("Get Answer")
|
| 915 |
-
with gr.Row():
|
| 916 |
-
answer_output = gr.Textbox(label="Answer", interactive=False)
|
| 917 |
-
reasoning_trace = gr.Textbox(label="Reasoning Trace", interactive=False)
|
| 918 |
-
answer_btn.click(
|
| 919 |
-
fn=lambda q, f, ctx: agent.answer_question_manual(q, f, ctx),
|
| 920 |
-
inputs=[question_input, file_upload, context_upload],
|
| 921 |
-
outputs=[answer_output, reasoning_trace]
|
| 922 |
-
)
|
| 923 |
-
# Tab 3: Submit Answers and View Score with Progress Bar
|
| 924 |
-
with gr.TabItem("Submit & Score"):
|
| 925 |
-
with gr.Row():
|
| 926 |
-
submit_token = gr.Textbox(label="Hugging Face Token", placeholder="Enter your HF token", type="password")
|
| 927 |
-
submit_btn = gr.Button("Run on All & Submit")
|
| 928 |
-
submit_progress = gr.Textbox(label="Submission Progress", value="Not started", interactive=False)
|
| 929 |
-
score_output = gr.Textbox(label="Score", interactive=False)
|
| 930 |
-
with gr.Row():
|
| 931 |
-
progress_bar = gr.Slider(minimum=0, maximum=100, value=0, label="Completion", interactive=False)
|
| 932 |
-
status_text = gr.Textbox(label="Status", value="Idle", interactive=False)
|
| 933 |
-
submit_btn.click(
|
| 934 |
-
fn=lambda token: agent.run_and_submit_all(token),
|
| 935 |
-
inputs=submit_token,
|
| 936 |
-
outputs=[submit_progress, score_output, progress_bar, status_text]
|
| 937 |
-
)
|
| 938 |
-
# Tab 4: Agent Details and Configuration
|
| 939 |
-
with gr.TabItem("Agent Details"):
|
| 940 |
-
gr.Markdown("## Agent Capabilities\n- **Tools**: Web search, image/audio analysis, table QA, YouTube QA, chess analysis, botanical classification\n- **Reasoning**: Thought-Action-Observation cycle with ReAct prompting (up to 5 steps)\n- **API**: Full GAIA API integration for fetching and submitting\n- **Performance**: Optimized with caching and error recovery")
|
| 941 |
-
with gr.Row():
|
| 942 |
-
tool_list = gr.Textbox(label="Available Tools", value=", ".join(TOOL_REGISTRY.keys()), interactive=False)
|
| 943 |
-
config_btn = gr.Button("Refresh Configuration")
|
| 944 |
-
config_output = gr.Textbox(label="Configuration Status", interactive=False)
|
| 945 |
-
config_btn.click(
|
| 946 |
-
fn=lambda: ("Configuration refreshed", ", ".join(TOOL_REGISTRY.keys())),
|
| 947 |
-
inputs=None,
|
| 948 |
-
outputs=[config_output, tool_list]
|
| 949 |
-
)
|
| 950 |
-
# Tab 5: Batch Processing with Progress Tracking
|
| 951 |
-
with gr.TabItem("Batch Processing"):
|
| 952 |
-
batch_token = gr.Textbox(label="Hugging Face Token", placeholder="Enter your HF token", type="password")
|
| 953 |
-
batch_btn = gr.Button("Process Batch of Questions")
|
| 954 |
-
batch_progress = gr.Textbox(label="Batch Progress", value="0/0 questions processed", interactive=False)
|
| 955 |
-
batch_results = gr.JSON(label="Batch Results")
|
| 956 |
-
batch_btn.click(
|
| 957 |
-
fn=lambda token: agent.process_batch(token),
|
| 958 |
-
inputs=batch_token,
|
| 959 |
-
outputs=[batch_progress, batch_results]
|
| 960 |
-
)
|
| 961 |
-
|
| 962 |
-
# Launch app with public link for easy access
|
| 963 |
-
app.launch(share=True)
|
| 964 |
-
|
| 965 |
-
if __name__ == "__main__":
|
| 966 |
-
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 967 |
-
# Check for SPACE_HOST and SPACE_ID at startup for information
|
| 968 |
-
space_host_startup = os.getenv("SPACE_HOST")
|
| 969 |
-
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
| 970 |
-
|
| 971 |
-
if space_host_startup:
|
| 972 |
-
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
| 973 |
-
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
| 974 |
-
else:
|
| 975 |
-
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 976 |
-
|
| 977 |
-
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
| 978 |
-
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 979 |
-
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 980 |
-
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
| 981 |
-
else:
|
| 982 |
-
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 983 |
-
|
| 984 |
-
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 985 |
-
|
| 986 |
-
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
| 987 |
-
app.launch(debug=True, share=False)
|
|
|
|
| 769 |
logger.error(f"Batch processing overall error: {e}")
|
| 770 |
yield "Error in batch processing", []
|
| 771 |
|
| 772 |
+
# --- Build Gradio Interface using Blocks (Maintaining Original Architecture) ---
|
| 773 |
+
with gr.Blocks() as demo:
|
| 774 |
+
gr.Markdown("# Smart Agent Evaluation Runner")
|
| 775 |
+
gr.Markdown("""
|
| 776 |
+
**Instructions:**
|
| 777 |
+
1. Clone this space, define your agent logic, tools, packages, etc.
|
| 778 |
+
2. Log in to Hugging Face.
|
| 779 |
+
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
| 780 |
+
""")
|
| 781 |
+
|
| 782 |
+
gr.LoginButton()
|
| 783 |
+
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 784 |
+
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 785 |
+
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 786 |
+
|
| 787 |
+
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
| 788 |
+
|
| 789 |
+
if __name__ == "__main__":
|
| 790 |
+
print("Launching Gradio Interface for Smart Agent Evaluation...")
|
| 791 |
+
demo.launch(debug=True, share=False)
|
| 792 |
|
| 793 |
+
# Update run_and_submit_all to use the enhanced ModularGAIAAgent
|
| 794 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 795 |
space_id = os.getenv("SPACE_ID")
|
| 796 |
if profile:
|
| 797 |
+
username = profile.username
|
| 798 |
print(f"User logged in: {username}")
|
| 799 |
else:
|
|
|
|
| 800 |
return "Please Login to Hugging Face with the button.", None
|
| 801 |
+
|
| 802 |
api_url = DEFAULT_API_URL
|
| 803 |
questions_url = f"{api_url}/questions"
|
| 804 |
submit_url = f"{api_url}/submit"
|
| 805 |
+
|
| 806 |
+
agent = ModularGAIAAgent(api_url=DEFAULT_API_URL)
|
|
|
|
|
|
|
|
|
|
| 807 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 808 |
+
|
|
|
|
| 809 |
try:
|
| 810 |
response = requests.get(questions_url, timeout=15)
|
| 811 |
response.raise_for_status()
|
| 812 |
questions_data = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
except Exception as e:
|
| 814 |
+
return f"Error fetching questions: {e}", None
|
| 815 |
+
|
| 816 |
results_log = []
|
| 817 |
answers_payload = []
|
| 818 |
+
correct_answers = 0
|
| 819 |
+
|
| 820 |
for item in questions_data:
|
| 821 |
task_id = item.get("task_id")
|
| 822 |
question_text = item.get("question")
|
| 823 |
+
if not task_id or not question_text:
|
|
|
|
|
|
|
| 824 |
continue
|
| 825 |
+
|
| 826 |
+
submitted_answer, trace = agent.answer_question(item)
|
| 827 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 828 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer, "Reasoning Trace": "\n".join(trace)})
|
| 829 |
+
|
|
|
|
|
|
|
| 830 |
if not answers_payload:
|
|
|
|
| 831 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 832 |
+
|
| 833 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
|
|
|
|
|
|
| 834 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 835 |
+
|
| 836 |
try:
|
| 837 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
| 838 |
response.raise_for_status()
|
|
|
|
| 842 |
f"User: {result_data.get('username')}\n"
|
| 843 |
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
| 844 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 845 |
+
f"Message: {result_data.get('message', 'No message received.')}"
|
| 846 |
+
)
|
| 847 |
results_df = pd.DataFrame(results_log)
|
| 848 |
return final_status, results_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 849 |
except Exception as e:
|
| 850 |
+
return f"Submission Failed: {e}", pd.DataFrame(results_log)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|