gaia-agent / evaluation_app.py
Supan23's picture
Upload 11 files
e6d5e51 verified
import os
import gradio as gr
import requests
import pandas as pd
import time
from langchain_core.messages import HumanMessage
from agent import build_graph
# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
class BasicAgent:
def __init__(self):
print("πŸ€– BasicAgent initialized.")
self.graph = build_graph()
def __call__(self, question: str) -> str:
print(f"πŸ€” Processing: {question[:50]}...")
try:
messages = [HumanMessage(content=question)]
result = self.graph.invoke({"messages": messages})
answer = result['messages'][-1].content
# Simple cleaning
answer = answer.strip()
if answer.startswith("Assistant: "):
answer = answer[11:]
print(f"βœ… Answer: {answer}")
return answer
except Exception as e:
print(f"❌ Error: {e}")
return f"Error: {str(e)}"
def run_evaluation(profile):
if not profile:
return "❌ Please login with HuggingFace first!", None
print(f"πŸš€ Starting evaluation for user: {profile.username}")
try:
# Initialize agent
agent = BasicAgent()
print("βœ… Agent initialized successfully")
# Get questions
questions_url = f"{DEFAULT_API_URL}/questions"
print(f"πŸ“₯ Fetching questions from: {questions_url}")
response = requests.get(questions_url, timeout=30)
questions = response.json()
print(f"πŸ“‹ Got {len(questions)} questions")
# Process questions
answers = []
results = []
for i, q in enumerate(questions): # Run on all questions
task_id = q.get("task_id")
question_text = q.get("question")
print(f"\nπŸ”„ Question {i+1}/{len(questions)}: {task_id}")
try:
answer = agent(question_text)
answers.append({"task_id": task_id, "submitted_answer": answer})
results.append({
"Task ID": task_id,
"Question": question_text[:100] + "...",
"Answer": answer
})
time.sleep(5) # Increased delay
except Exception as e:
print(f"❌ Error on question {task_id}: {e}")
results.append({
"Task ID": task_id,
"Question": question_text[:100] + "...",
"Answer": f"ERROR: {e}"
})
# Submit answers
space_id = os.getenv("SPACE_ID", "Supan23/gaia-agent")
submit_data = {
"username": profile.username,
"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
"answers": answers
}
submit_url = f"{DEFAULT_API_URL}/submit"
print(f"πŸ“€ Submitting to: {submit_url}")
response = requests.post(submit_url, json=submit_data, timeout=60)
result = response.json()
status = f"""πŸŽ‰ EVALUATION COMPLETE!
πŸ‘€ User: {result.get('username')}
πŸ“Š Score: {result.get('score', 0)}%
βœ… Correct: {result.get('correct_count', 0)}/{result.get('total_attempted', 0)}
πŸ’¬ Message: {result.get('message', 'No message')}
{'πŸ† CERTIFICATE ELIGIBLE!' if result.get('score', 0) >= 30 else 'πŸ“ˆ Need 30% for certificate'}
"""
return status, pd.DataFrame(results)
except Exception as e:
error_msg = f"❌ Evaluation failed: {str(e)}"
print(error_msg)
return error_msg, None
# Create the interface
with gr.Blocks() as demo:
gr.Markdown("# πŸŽ“ GAIA Agent - Certificate Test")
gr.Markdown("**Click Login, then click Run Test to get your certificate!**")
gr.LoginButton()
run_btn = gr.Button("πŸš€ Run Certificate Test", variant="primary")
status = gr.Textbox(label="πŸ“Š Results", lines=10)
table = gr.DataFrame(label="πŸ“‹ Questions & Answers")
run_btn.click(run_evaluation, outputs=[status, table])
if __name__ == "__main__":
demo.launch()