import os import gradio as gr import requests import pandas as pd import time # Import your newly refactored smolagents GaiaAgent from agent import GaiaAgent # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # Global placeholder layout to keep the UI dataframe stable during initialization steps COLUMNS = ["Task ID", "Question", "Submitted Answer"] INITIAL_DF = pd.DataFrame(columns=COLUMNS) def run_and_submit_all(profile: gr.OAuthProfile | None): """ Fetches all questions, runs the GaiaAgent on them, submits all answers, and streams the results back to the Gradio UI asynchronously. """ space_id = os.getenv("SPACE_ID") if profile: username = f"{profile.username}" print(f"User logged in: {username}") else: yield "Please Login to Hugging Face with the button.", INITIAL_DF return api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" yield "Initializing Agent...", INITIAL_DF # 1. Instantiate Agent try: agent = GaiaAgent() except Exception as e: yield f"Error initializing agent: {e}", INITIAL_DF return agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run" # 2. Fetch Questions yield "Fetching questions from server...", INITIAL_DF try: response = requests.get(questions_url, timeout=15) response.raise_for_status() questions_data = response.json() if not questions_data: yield "Fetched questions list is empty or invalid format.", INITIAL_DF return except Exception as e: yield f"An unexpected error occurred fetching questions: {e}", INITIAL_DF return total_questions = len(questions_data) results_log = [] answers_payload = [] # 3. Run Agent (Streaming Updates via Yield) for i, item in enumerate(questions_data, 1): task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: continue yield f"Processing question {i}/{total_questions} (Task ID: {task_id})...", pd.DataFrame(results_log, columns=COLUMNS) try: submitted_answer = agent(task_id, question_text) answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) results_log.append({ "Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer }) except Exception as e: results_log.append({ "Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}" }) # Yield updated table immediately so the user sees real-time progress yield f"Completed {i}/{total_questions}. Moving to next...", pd.DataFrame(results_log, columns=COLUMNS) # Small buffer to prevent overwhelming the server between consecutive model inquiries time.sleep(2) if not answers_payload: yield "Agent did not produce any answers to submit.", pd.DataFrame(results_log, columns=COLUMNS) return # 4. Prepare & Submit submission_data = { "username": username.strip(), "agent_code": agent_code, "answers": answers_payload } yield f"Agent finished. Submitting {len(answers_payload)} answers...", pd.DataFrame(results_log, columns=COLUMNS) try: response = requests.post(submit_url, json=submission_data, timeout=60) response.raise_for_status() result_data = response.json() final_status = ( f"🎉 Submission Successful!\n" f"User: {result_data.get('username')}\n" f"Overall Score: {result_data.get('score', 'N/A')}%\n" f"Correct: {result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')}\n" f"Message: {result_data.get('message', 'No message received.')}" ) yield final_status, pd.DataFrame(results_log, columns=COLUMNS) except Exception as e: yield f"Submission Failed: {e}", pd.DataFrame(results_log, columns=COLUMNS) # --- Build Gradio Interface using Blocks --- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🤖 GAIA Agent Evaluation Runner") gr.Markdown( """ **Instructions:** 1. Log in to your Hugging Face account using the button below. This uses your HF username for submission. 2. Click 'Run Evaluation & Submit All Answers' to fetch questions and run your agent. 3. Watch the table update in real-time as your agent processes each question! """ ) gr.LoginButton() run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary") status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False) results_table = gr.DataFrame(value=INITIAL_DF, label="Questions and Agent Answers", wrap=True) # Gradio handles the gr.OAuthProfile backend injection natively when clicked run_button.click( fn=run_and_submit_all, inputs=None, outputs=[status_output, results_table] ) if __name__ == "__main__": print("\n" + "-"*30 + " App Starting " + "-"*30) space_host_startup = os.getenv("SPACE_HOST") space_id_startup = os.getenv("SPACE_ID") if space_host_startup: print(f"✅ SPACE_HOST found: {space_host_startup}") else: print("â„šī¸ SPACE_HOST environment variable not found (running locally?).") if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup}") else: print("â„šī¸ SPACE_ID environment variable not found (running locally?).") print("-"*(60 + len(" App Starting ")) + "\n") print("Launching Gradio Interface for GAIA Agent Evaluation...") demo.launch(debug=True, share=False)