File size: 5,216 Bytes
10e9b7d c62c685 c91abc5 c62c685 e050799 3db6293 cb4241f c62c685 d634beb c62c685 d634beb e0d5690 d634beb c2690f9 d634beb c62c685 d634beb c62c685 d634beb 6aa0515 d634beb c62c685 d634beb 6aa0515 c62c685 d634beb e0e5e7c d634beb c62c685 d634beb c62c685 d634beb e0e5e7c c62c685 bba9209 c62c685 bba9209 c62c685 6aa0515 c62c685 6aa0515 c62c685 d7ac30f c62c685 e0e5e7c d634beb c62c685 e0e5e7c d634beb c62c685 d7ac30f c91abc5 c62c685 1a20082 d634beb c62c685 d634beb 6aa0515 1a20082 c62c685 c91abc5 c62c685 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | import os
import gradio as gr
import requests
import pandas as pd
from agent import GaiaAgent
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Fetch questions from GAIA API, run agent, submit answers
"""
# Check login
if not profile:
return "Please login to Hugging Face first.", None
username = profile.username
space_id = os.getenv("SPACE_ID")
print(f"\n[run_and_submit_all] starting for user: {username}")
# Fetch questions from API
print("[run_and_submit_all] fetching questions from API...")
try:
questions_resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
questions_resp.raise_for_status()
questions = questions_resp.json()
print(f"[run_and_submit_all] ✓ fetched {len(questions)} questions")
except Exception as e:
error_msg = f"Error fetching questions: {str(e)[:200]}"
print(f"[run_and_submit_all] ✗ {error_msg}")
return error_msg, None
# Initialize agent
print("[run_and_submit_all] initializing agent...")
agent = GaiaAgent()
# Run agent on each question
results_log = []
answers_payload = []
for i, item in enumerate(questions):
task_id = item.get("task_id")
question = item.get("question")
file_name = item.get("file_name", "")
print(f"\n[run_and_submit_all] [{i+1}/{len(questions)}] task_id={task_id}")
print(f" question: {question[:80]}...")
print(f" file: {file_name if file_name else '(none)'}")
file_content = ""
# Try to fetch file if it exists
if file_name:
try:
print(f" fetching file: {file_name}...")
file_resp = requests.get(
f"{DEFAULT_API_URL}/files/{task_id}",
timeout=30
)
if file_resp.status_code == 200:
# Try to decode as text first
try:
file_content = file_resp.text[:5000]
print(f" loaded {len(file_content)} chars from file")
except:
# If binary, note it
file_content = f"[Binary file: {file_name}, {len(file_resp.content)} bytes]"
print(f" loaded binary file")
else:
print(f" file fetch returned {file_resp.status_code} (skipping)")
except Exception as e:
print(f" error fetching file: {e}")
# Run agent
try:
answer = agent(question, file_content=file_content)
except Exception as e:
print(f" error running agent: {e}")
answer = "I am unable to answer"
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({
"Task ID": task_id[:8] + "...",
"Question": question[:60] + "...",
"Answer": answer[:60] + "..." if len(answer) > 60 else answer
})
# Submit answers
print(f"\n[run_and_submit_all] submitting {len(answers_payload)} answers...")
submission_data = {
"username": username.strip(),
"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local",
"answers": answers_payload
}
try:
response = requests.post(
f"{DEFAULT_API_URL}/submit",
json=submission_data,
timeout=60
)
response.raise_for_status()
result = response.json()
status_msg = (
f"✅ Submission Successful!\n"
f"User: {result.get('username')}\n"
f"Score: {result.get('score', 'N/A')}% "
f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
f"Message: {result.get('message', 'No message')}"
)
print(status_msg)
return status_msg, pd.DataFrame(results_log)
except Exception as e:
error_msg = f"❌ Submission failed: {str(e)[:200]}"
print(error_msg)
return error_msg, pd.DataFrame(results_log)
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent — Mistral")
gr.Markdown("""
**How it works:**
1. Click "Login with Hugging Face"
2. Click "Run Evaluation"
3. Agent processes all 20 questions
4. See your score instantly!
**Features:**
- Uses Mistral model via Groq API
- Web search via DuckDuckGo (free, no keys)
- Fetches files from GAIA API
- Automatic answer submission
""")
gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit", size="lg", variant="primary")
status_output = gr.Textbox(label="Status / Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Results", wrap=True)
run_btn.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
demo.launch(debug=True, share=False) |