Spaces:

Supan23
/

gaia-agent

Sleeping

App Files Files Community

gaia-agent / evaluation_app.py

Supan23

Upload 11 files

e6d5e51 verified 7 months ago

raw

history blame contribute delete

4.23 kB

	import os
	import gradio as gr
	import requests
	import pandas as pd
	import time
	from langchain_core.messages import HumanMessage
	from agent import build_graph

	# Constants
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	class BasicAgent:
	def __init__(self):
	print("🤖 BasicAgent initialized.")
	self.graph = build_graph()

	def __call__(self, question: str) -> str:
	print(f"🤔 Processing: {question[:50]}...")
	try:
	messages = [HumanMessage(content=question)]
	result = self.graph.invoke({"messages": messages})
	answer = result['messages'][-1].content

	# Simple cleaning
	answer = answer.strip()
	if answer.startswith("Assistant: "):
	answer = answer[11:]

	print(f"✅ Answer: {answer}")
	return answer
	except Exception as e:
	print(f"❌ Error: {e}")
	return f"Error: {str(e)}"

	def run_evaluation(profile):
	if not profile:
	return "❌ Please login with HuggingFace first!", None

	print(f"🚀 Starting evaluation for user: {profile.username}")

	try:
	# Initialize agent
	agent = BasicAgent()
	print("✅ Agent initialized successfully")

	# Get questions
	questions_url = f"{DEFAULT_API_URL}/questions"
	print(f"📥 Fetching questions from: {questions_url}")

	response = requests.get(questions_url, timeout=30)
	questions = response.json()
	print(f"📋 Got {len(questions)} questions")

	# Process questions
	answers = []
	results = []

	for i, q in enumerate(questions): # Run on all questions
	task_id = q.get("task_id")
	question_text = q.get("question")

	print(f"\n🔄 Question {i+1}/{len(questions)}: {task_id}")

	try:
	answer = agent(question_text)
	answers.append({"task_id": task_id, "submitted_answer": answer})
	results.append({
	"Task ID": task_id,
	"Question": question_text[:100] + "...",
	"Answer": answer
	})
	time.sleep(5) # Increased delay
	except Exception as e:
	print(f"❌ Error on question {task_id}: {e}")
	results.append({
	"Task ID": task_id,
	"Question": question_text[:100] + "...",
	"Answer": f"ERROR: {e}"
	})

	# Submit answers
	space_id = os.getenv("SPACE_ID", "Supan23/gaia-agent")
	submit_data = {
	"username": profile.username,
	"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
	"answers": answers
	}

	submit_url = f"{DEFAULT_API_URL}/submit"
	print(f"📤 Submitting to: {submit_url}")

	response = requests.post(submit_url, json=submit_data, timeout=60)
	result = response.json()

	status = f"""🎉 EVALUATION COMPLETE!

	👤 User: {result.get('username')}
	📊 Score: {result.get('score', 0)}%
	✅ Correct: {result.get('correct_count', 0)}/{result.get('total_attempted', 0)}
	💬 Message: {result.get('message', 'No message')}

	{'🏆 CERTIFICATE ELIGIBLE!' if result.get('score', 0) >= 30 else '📈 Need 30% for certificate'}
	"""

	return status, pd.DataFrame(results)

	except Exception as e:
	error_msg = f"❌ Evaluation failed: {str(e)}"
	print(error_msg)
	return error_msg, None

	# Create the interface
	with gr.Blocks() as demo:
	gr.Markdown("# 🎓 GAIA Agent - Certificate Test")
	gr.Markdown("Click Login, then click Run Test to get your certificate!")

	gr.LoginButton()

	run_btn = gr.Button("🚀 Run Certificate Test", variant="primary")
	status = gr.Textbox(label="📊 Results", lines=10)
	table = gr.DataFrame(label="📋 Questions & Answers")

	run_btn.click(run_evaluation, outputs=[status, table])

	if __name__ == "__main__":
	demo.launch()