mohammedff5642's picture
Update app.py
d634beb verified
Raw
History Blame Contribute Delete
5.22 kB
import os
import gradio as gr
import requests
import pandas as pd
from agent import GaiaAgent
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Fetch questions from GAIA API, run agent, submit answers
"""
# Check login
if not profile:
return "Please login to Hugging Face first.", None
username = profile.username
space_id = os.getenv("SPACE_ID")
print(f"\n[run_and_submit_all] starting for user: {username}")
# Fetch questions from API
print("[run_and_submit_all] fetching questions from API...")
try:
questions_resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
questions_resp.raise_for_status()
questions = questions_resp.json()
print(f"[run_and_submit_all] ✓ fetched {len(questions)} questions")
except Exception as e:
error_msg = f"Error fetching questions: {str(e)[:200]}"
print(f"[run_and_submit_all] ✗ {error_msg}")
return error_msg, None
# Initialize agent
print("[run_and_submit_all] initializing agent...")
agent = GaiaAgent()
# Run agent on each question
results_log = []
answers_payload = []
for i, item in enumerate(questions):
task_id = item.get("task_id")
question = item.get("question")
file_name = item.get("file_name", "")
print(f"\n[run_and_submit_all] [{i+1}/{len(questions)}] task_id={task_id}")
print(f" question: {question[:80]}...")
print(f" file: {file_name if file_name else '(none)'}")
file_content = ""
# Try to fetch file if it exists
if file_name:
try:
print(f" fetching file: {file_name}...")
file_resp = requests.get(
f"{DEFAULT_API_URL}/files/{task_id}",
timeout=30
)
if file_resp.status_code == 200:
# Try to decode as text first
try:
file_content = file_resp.text[:5000]
print(f" loaded {len(file_content)} chars from file")
except:
# If binary, note it
file_content = f"[Binary file: {file_name}, {len(file_resp.content)} bytes]"
print(f" loaded binary file")
else:
print(f" file fetch returned {file_resp.status_code} (skipping)")
except Exception as e:
print(f" error fetching file: {e}")
# Run agent
try:
answer = agent(question, file_content=file_content)
except Exception as e:
print(f" error running agent: {e}")
answer = "I am unable to answer"
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({
"Task ID": task_id[:8] + "...",
"Question": question[:60] + "...",
"Answer": answer[:60] + "..." if len(answer) > 60 else answer
})
# Submit answers
print(f"\n[run_and_submit_all] submitting {len(answers_payload)} answers...")
submission_data = {
"username": username.strip(),
"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local",
"answers": answers_payload
}
try:
response = requests.post(
f"{DEFAULT_API_URL}/submit",
json=submission_data,
timeout=60
)
response.raise_for_status()
result = response.json()
status_msg = (
f"✅ Submission Successful!\n"
f"User: {result.get('username')}\n"
f"Score: {result.get('score', 'N/A')}% "
f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
f"Message: {result.get('message', 'No message')}"
)
print(status_msg)
return status_msg, pd.DataFrame(results_log)
except Exception as e:
error_msg = f"❌ Submission failed: {str(e)[:200]}"
print(error_msg)
return error_msg, pd.DataFrame(results_log)
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent — Mistral")
gr.Markdown("""
**How it works:**
1. Click "Login with Hugging Face"
2. Click "Run Evaluation"
3. Agent processes all 20 questions
4. See your score instantly!
**Features:**
- Uses Mistral model via Groq API
- Web search via DuckDuckGo (free, no keys)
- Fetches files from GAIA API
- Automatic answer submission
""")
gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit", size="lg", variant="primary")
status_output = gr.Textbox(label="Status / Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Results", wrap=True)
run_btn.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
demo.launch(debug=True, share=False)