| import os |
| import gradio as gr |
| import requests |
| import pandas as pd |
| from agent import GaiaAgent |
|
|
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
| def run_and_submit_all(profile: gr.OAuthProfile | None): |
| """ |
| Fetch questions from GAIA API, run agent, submit answers |
| """ |
| |
| |
| if not profile: |
| return "Please login to Hugging Face first.", None |
| |
| username = profile.username |
| space_id = os.getenv("SPACE_ID") |
| |
| print(f"\n[run_and_submit_all] starting for user: {username}") |
| |
| |
| print("[run_and_submit_all] fetching questions from API...") |
| try: |
| questions_resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30) |
| questions_resp.raise_for_status() |
| questions = questions_resp.json() |
| print(f"[run_and_submit_all] ✓ fetched {len(questions)} questions") |
| except Exception as e: |
| error_msg = f"Error fetching questions: {str(e)[:200]}" |
| print(f"[run_and_submit_all] ✗ {error_msg}") |
| return error_msg, None |
| |
| |
| print("[run_and_submit_all] initializing agent...") |
| agent = GaiaAgent() |
| |
| |
| results_log = [] |
| answers_payload = [] |
| |
| for i, item in enumerate(questions): |
| task_id = item.get("task_id") |
| question = item.get("question") |
| file_name = item.get("file_name", "") |
| |
| print(f"\n[run_and_submit_all] [{i+1}/{len(questions)}] task_id={task_id}") |
| print(f" question: {question[:80]}...") |
| print(f" file: {file_name if file_name else '(none)'}") |
| |
| file_content = "" |
| |
| |
| if file_name: |
| try: |
| print(f" fetching file: {file_name}...") |
| file_resp = requests.get( |
| f"{DEFAULT_API_URL}/files/{task_id}", |
| timeout=30 |
| ) |
| if file_resp.status_code == 200: |
| |
| try: |
| file_content = file_resp.text[:5000] |
| print(f" loaded {len(file_content)} chars from file") |
| except: |
| |
| file_content = f"[Binary file: {file_name}, {len(file_resp.content)} bytes]" |
| print(f" loaded binary file") |
| else: |
| print(f" file fetch returned {file_resp.status_code} (skipping)") |
| except Exception as e: |
| print(f" error fetching file: {e}") |
| |
| |
| try: |
| answer = agent(question, file_content=file_content) |
| except Exception as e: |
| print(f" error running agent: {e}") |
| answer = "I am unable to answer" |
| |
| answers_payload.append({"task_id": task_id, "submitted_answer": answer}) |
| results_log.append({ |
| "Task ID": task_id[:8] + "...", |
| "Question": question[:60] + "...", |
| "Answer": answer[:60] + "..." if len(answer) > 60 else answer |
| }) |
| |
| |
| print(f"\n[run_and_submit_all] submitting {len(answers_payload)} answers...") |
| submission_data = { |
| "username": username.strip(), |
| "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local", |
| "answers": answers_payload |
| } |
| |
| try: |
| response = requests.post( |
| f"{DEFAULT_API_URL}/submit", |
| json=submission_data, |
| timeout=60 |
| ) |
| response.raise_for_status() |
| result = response.json() |
| |
| status_msg = ( |
| f"✅ Submission Successful!\n" |
| f"User: {result.get('username')}\n" |
| f"Score: {result.get('score', 'N/A')}% " |
| f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n" |
| f"Message: {result.get('message', 'No message')}" |
| ) |
| print(status_msg) |
| return status_msg, pd.DataFrame(results_log) |
| |
| except Exception as e: |
| error_msg = f"❌ Submission failed: {str(e)[:200]}" |
| print(error_msg) |
| return error_msg, pd.DataFrame(results_log) |
|
|
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# GAIA Agent — Mistral") |
| gr.Markdown(""" |
| **How it works:** |
| 1. Click "Login with Hugging Face" |
| 2. Click "Run Evaluation" |
| 3. Agent processes all 20 questions |
| 4. See your score instantly! |
| |
| **Features:** |
| - Uses Mistral model via Groq API |
| - Web search via DuckDuckGo (free, no keys) |
| - Fetches files from GAIA API |
| - Automatic answer submission |
| """) |
| |
| gr.LoginButton() |
| run_btn = gr.Button("Run Evaluation & Submit", size="lg", variant="primary") |
| |
| status_output = gr.Textbox(label="Status / Result", lines=5, interactive=False) |
| results_table = gr.DataFrame(label="Results", wrap=True) |
| |
| run_btn.click( |
| fn=run_and_submit_all, |
| outputs=[status_output, results_table] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(debug=True, share=False) |