| | import os |
| | import tempfile |
| | import gradio as gr |
| | import pandas as pd |
| | import traceback |
| | from core_agent import GAIAAgent |
| | from api_integration import GAIAApiClient |
| |
|
| | |
| | DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
| |
|
| | def save_task_file(file_content, task_id): |
| | """ |
| | Save a task file to a temporary location |
| | """ |
| | if not file_content: |
| | return None |
| | |
| | |
| | temp_dir = tempfile.gettempdir() |
| | file_path = os.path.join(temp_dir, f"gaia_task_{task_id}.txt") |
| | |
| | |
| | with open(file_path, 'wb') as f: |
| | f.write(file_content) |
| | |
| | print(f"File saved to {file_path}") |
| | return file_path |
| |
|
| | def get_agent_configuration(): |
| | """ |
| | Get the agent configuration based on environment variables |
| | """ |
| | |
| | config = { |
| | "model_type": "OpenAIServerModel", |
| | "model_id": "gpt-4o", |
| | "temperature": 0.2, |
| | "executor_type": "local", |
| | "verbose": False, |
| | "provider": "hf-inference", |
| | "timeout": 120 |
| | } |
| | |
| | |
| | xai_api_key = os.getenv("XAI_API_KEY") |
| | xai_api_base = os.getenv("XAI_API_BASE") |
| | |
| | |
| | if xai_api_key: |
| | config["api_key"] = xai_api_key |
| | if xai_api_base: |
| | config["api_base"] = xai_api_base |
| | |
| | config["model_id"] = "mixtral-8x7b-32768" |
| | |
| | |
| | if os.getenv("AGENT_MODEL_TYPE"): |
| | config["model_type"] = os.getenv("AGENT_MODEL_TYPE") |
| | |
| | if os.getenv("AGENT_MODEL_ID"): |
| | config["model_id"] = os.getenv("AGENT_MODEL_ID") |
| | |
| | if os.getenv("AGENT_TEMPERATURE"): |
| | config["temperature"] = float(os.getenv("AGENT_TEMPERATURE")) |
| | |
| | if os.getenv("AGENT_EXECUTOR_TYPE"): |
| | config["executor_type"] = os.getenv("AGENT_EXECUTOR_TYPE") |
| | |
| | if os.getenv("AGENT_VERBOSE") is not None: |
| | config["verbose"] = os.getenv("AGENT_VERBOSE").lower() == "true" |
| | |
| | if os.getenv("AGENT_API_BASE"): |
| | config["api_base"] = os.getenv("AGENT_API_BASE") |
| | |
| | |
| | if os.getenv("AGENT_PROVIDER"): |
| | config["provider"] = os.getenv("AGENT_PROVIDER") |
| | |
| | if os.getenv("AGENT_TIMEOUT"): |
| | config["timeout"] = int(os.getenv("AGENT_TIMEOUT")) |
| | |
| | return config |
| |
|
| | def run_and_submit_all(profile: gr.OAuthProfile | None): |
| | """ |
| | Fetches all questions, runs the GAIAAgent on them, submits all answers, |
| | and displays the results. |
| | """ |
| | |
| | if not profile: |
| | return "Please Login to Hugging Face with the button.", None |
| | |
| | username = profile.username |
| | print(f"User logged in: {username}") |
| | |
| | |
| | space_id = os.getenv("SPACE_ID") |
| | agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" |
| | |
| | |
| | api_client = GAIAApiClient(DEFAULT_API_URL) |
| | |
| | |
| | try: |
| | agent_config = get_agent_configuration() |
| | print(f"Using agent configuration: {agent_config}") |
| | |
| | agent = GAIAAgent(**agent_config) |
| | print("Agent initialized successfully") |
| | except Exception as e: |
| | error_details = traceback.format_exc() |
| | print(f"Error initializing agent: {e}\n{error_details}") |
| | return f"Error initializing agent: {e}", None |
| | |
| | |
| | try: |
| | questions_data = api_client.get_questions() |
| | if not questions_data: |
| | return "Fetched questions list is empty or invalid format.", None |
| | print(f"Fetched {len(questions_data)} questions.") |
| | except Exception as e: |
| | error_details = traceback.format_exc() |
| | print(f"Error fetching questions: {e}\n{error_details}") |
| | return f"Error fetching questions: {e}", None |
| | |
| | |
| | results_log = [] |
| | answers_payload = [] |
| | print(f"Running agent on {len(questions_data)} questions...") |
| | |
| | |
| | total_questions = len(questions_data) |
| | completed = 0 |
| | failed = 0 |
| | |
| | for item in questions_data: |
| | task_id = item.get("task_id") |
| | question_text = item.get("question") |
| | if not task_id or question_text is None: |
| | print(f"Skipping item with missing task_id or question: {item}") |
| | continue |
| | |
| | try: |
| | |
| | completed += 1 |
| | print(f"Processing question {completed}/{total_questions}: Task ID {task_id}") |
| | |
| | |
| | file_path = None |
| | try: |
| | file_content = api_client.get_file(task_id) |
| | print(f"Downloaded file for task {task_id}") |
| | file_path = save_task_file(file_content, task_id) |
| | except Exception as file_e: |
| | print(f"No file found for task {task_id} or error: {file_e}") |
| | |
| | |
| | submitted_answer = agent.answer_question(question_text, file_path) |
| | |
| | |
| | answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) |
| | results_log.append({ |
| | "Task ID": task_id, |
| | "Question": question_text, |
| | "Submitted Answer": submitted_answer |
| | }) |
| | except Exception as e: |
| | |
| | failed += 1 |
| | error_details = traceback.format_exc() |
| | print(f"Error running agent on task {task_id}: {e}\n{error_details}") |
| | |
| | |
| | error_msg = f"AGENT ERROR: {e}" |
| | answers_payload.append({"task_id": task_id, "submitted_answer": error_msg}) |
| | results_log.append({ |
| | "Task ID": task_id, |
| | "Question": question_text, |
| | "Submitted Answer": error_msg |
| | }) |
| | |
| | |
| | print(f"\nProcessing complete: {completed} questions processed, {failed} failures") |
| | |
| | if not answers_payload: |
| | return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) |
| | |
| | |
| | submission_data = { |
| | "username": username.strip(), |
| | "agent_code": agent_code, |
| | "answers": answers_payload |
| | } |
| | |
| | print(f"Submitting {len(answers_payload)} answers for username '{username}'...") |
| | |
| | try: |
| | result_data = api_client.submit_answers( |
| | username.strip(), |
| | agent_code, |
| | answers_payload |
| | ) |
| | |
| | |
| | correct_count = result_data.get('correct_count', 0) |
| | total_attempted = result_data.get('total_attempted', len(answers_payload)) |
| | success_rate = (correct_count / total_attempted) * 100 if total_attempted > 0 else 0 |
| | |
| | final_status = ( |
| | f"Submission Successful!\n" |
| | f"User: {result_data.get('username')}\n" |
| | f"Overall Score: {result_data.get('score', 'N/A')}% " |
| | f"({correct_count}/{total_attempted} correct, {success_rate:.1f}% success rate)\n" |
| | f"Message: {result_data.get('message', 'No message received.')}" |
| | ) |
| | |
| | print("Submission successful.") |
| | return final_status, pd.DataFrame(results_log) |
| | except Exception as e: |
| | error_details = traceback.format_exc() |
| | status_message = f"Submission Failed: {e}\n{error_details}" |
| | print(status_message) |
| | return status_message, pd.DataFrame(results_log) |
| |
|
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown("# GAIA Agent Evaluation Runner") |
| | gr.Markdown( |
| | """ |
| | **Instructions:** |
| | |
| | 1. Log in to your Hugging Face account using the button below. |
| | 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score. |
| | |
| | **Configuration:** |
| | |
| | You can configure the agent by setting these environment variables: |
| | - `AGENT_MODEL_TYPE`: Model type (HfApiModel, InferenceClientModel, LiteLLMModel, OpenAIServerModel) |
| | - `AGENT_MODEL_ID`: Model ID |
| | - `AGENT_TEMPERATURE`: Temperature for generation (0.0-1.0) |
| | - `AGENT_EXECUTOR_TYPE`: Type of executor ('local' or 'e2b') |
| | - `AGENT_VERBOSE`: Enable verbose logging (true/false) |
| | - `AGENT_API_BASE`: Base URL for API calls (for OpenAIServerModel) |
| | |
| | **xAI Support:** |
| | - `XAI_API_KEY`: Your xAI API key |
| | - `XAI_API_BASE`: Base URL for xAI API (default: https://api.groq.com/openai/v1) |
| | - When using xAI, set AGENT_MODEL_TYPE=OpenAIServerModel and AGENT_MODEL_ID=mixtral-8x7b-32768 |
| | |
| | **InferenceClientModel specific settings:** |
| | - `AGENT_PROVIDER`: Provider for InferenceClientModel (e.g., "hf-inference") |
| | - `AGENT_TIMEOUT`: Timeout in seconds for API calls |
| | """ |
| | ) |
| | |
| | gr.LoginButton() |
| | |
| | run_button = gr.Button("Run Evaluation & Submit All Answers") |
| | |
| | status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) |
| | results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) |
| | |
| | run_button.click( |
| | fn=run_and_submit_all, |
| | outputs=[status_output, results_table] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | print("\n" + "-"*30 + " App Starting " + "-"*30) |
| | |
| | |
| | config = get_agent_configuration() |
| | print(f"Agent configuration: {config}") |
| | |
| | |
| | demo.launch(debug=True, share=False) |
| |
|