import os import gradio as gr import requests import pandas as pd from agent import GaiaAgent DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" def run_and_submit_all(profile: gr.OAuthProfile | None): """ Fetch questions from GAIA API, run agent, submit answers """ # Check login if not profile: return "Please login to Hugging Face first.", None username = profile.username space_id = os.getenv("SPACE_ID") print(f"\n[run_and_submit_all] starting for user: {username}") # Fetch questions from API print("[run_and_submit_all] fetching questions from API...") try: questions_resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30) questions_resp.raise_for_status() questions = questions_resp.json() print(f"[run_and_submit_all] ✓ fetched {len(questions)} questions") except Exception as e: error_msg = f"Error fetching questions: {str(e)[:200]}" print(f"[run_and_submit_all] ✗ {error_msg}") return error_msg, None # Initialize agent print("[run_and_submit_all] initializing agent...") agent = GaiaAgent() # Run agent on each question results_log = [] answers_payload = [] for i, item in enumerate(questions): task_id = item.get("task_id") question = item.get("question") file_name = item.get("file_name", "") print(f"\n[run_and_submit_all] [{i+1}/{len(questions)}] task_id={task_id}") print(f" question: {question[:80]}...") print(f" file: {file_name if file_name else '(none)'}") file_content = "" # Try to fetch file if it exists if file_name: try: print(f" fetching file: {file_name}...") file_resp = requests.get( f"{DEFAULT_API_URL}/files/{task_id}", timeout=30 ) if file_resp.status_code == 200: # Try to decode as text first try: file_content = file_resp.text[:5000] print(f" loaded {len(file_content)} chars from file") except: # If binary, note it file_content = f"[Binary file: {file_name}, {len(file_resp.content)} bytes]" print(f" loaded binary file") else: print(f" file fetch returned {file_resp.status_code} (skipping)") except Exception as e: print(f" error fetching file: {e}") # Run agent try: answer = agent(question, file_content=file_content) except Exception as e: print(f" error running agent: {e}") answer = "I am unable to answer" answers_payload.append({"task_id": task_id, "submitted_answer": answer}) results_log.append({ "Task ID": task_id[:8] + "...", "Question": question[:60] + "...", "Answer": answer[:60] + "..." if len(answer) > 60 else answer }) # Submit answers print(f"\n[run_and_submit_all] submitting {len(answers_payload)} answers...") submission_data = { "username": username.strip(), "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local", "answers": answers_payload } try: response = requests.post( f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60 ) response.raise_for_status() result = response.json() status_msg = ( f"✅ Submission Successful!\n" f"User: {result.get('username')}\n" f"Score: {result.get('score', 'N/A')}% " f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n" f"Message: {result.get('message', 'No message')}" ) print(status_msg) return status_msg, pd.DataFrame(results_log) except Exception as e: error_msg = f"❌ Submission failed: {str(e)[:200]}" print(error_msg) return error_msg, pd.DataFrame(results_log) # Gradio UI with gr.Blocks() as demo: gr.Markdown("# GAIA Agent — Mistral") gr.Markdown(""" **How it works:** 1. Click "Login with Hugging Face" 2. Click "Run Evaluation" 3. Agent processes all 20 questions 4. See your score instantly! **Features:** - Uses Mistral model via Groq API - Web search via DuckDuckGo (free, no keys) - Fetches files from GAIA API - Automatic answer submission """) gr.LoginButton() run_btn = gr.Button("Run Evaluation & Submit", size="lg", variant="primary") status_output = gr.Textbox(label="Status / Result", lines=5, interactive=False) results_table = gr.DataFrame(label="Results", wrap=True) run_btn.click( fn=run_and_submit_all, outputs=[status_output, results_table] ) if __name__ == "__main__": demo.launch(debug=True, share=False)