Spaces:

schoolkithub
/

GAIA_AGE

Sleeping

GAIA_AGE / app.py

ghost

Updated GAIA agent for submission

945d0d0 7 months ago

4.18 kB

	import gradio as gr
	import json
	import os
	from datetime import datetime
	from agent import GAIAAgent
	from evaluate import evaluate_agent, create_sample_dataset
	import traceback

	def run_evaluation():
	"""Run the GAIA evaluation and return results."""
	try:
	print("Starting GAIA Agent Evaluation...")
	print("=" * 50)

	# Initialize agent
	agent = GAIAAgent()

	# Test API connection first
	print("Testing xAI API connection...")
	test_response = agent.test_grok()
	print(f"API Test Response: {test_response}")

	# Run evaluation on sample dataset (since we don't have the full GAIA dataset)
	print("\nRunning evaluation on sample tasks...")
	score = evaluate_agent(dataset_path=None, max_tasks=10)

	# Read submission file if it exists
	submission_content = ""
	if os.path.exists("submission.jsonl"):
	with open("submission.jsonl", "r") as f:
	submission_content = f.read()

	# Format results
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	results = f"""
	# GAIA Agent Evaluation Results

	Timestamp: {timestamp}
	Final Score: {score:.2f}%
	Certificate Status: {'✅ ACHIEVED (≥30%)' if score >= 30 else '❌ NOT ACHIEVED (<30%)'}

	## API Connection Status
	{test_response}

	## Submission File Preview
	```json
	{submission_content[:500]}{'...' if len(submission_content) > 500 else ''}
	```

	## Next Steps
	{'🎉 Congratulations! You can now claim your Certificate of Excellence!' if score >= 30 else '💪 Keep improving your agent to reach the 30% threshold.'}
	"""

	return results, score

	except Exception as e:
	error_msg = f"""
	# Evaluation Error

	Error: {str(e)}

	Traceback:
	```
	{traceback.format_exc()}
	```

	Please check the logs and fix any issues before retrying.
	"""
	return error_msg, 0.0

	def create_interface():
	"""Create the Gradio interface."""

	with gr.Blocks(title="GAIA Agent Evaluation", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🤖 GAIA Agent Evaluation

	This is your GAIA benchmark agent for the Hugging Face Agents Course Certificate of Excellence.

	Goal: Achieve ≥30% score on GAIA benchmark tasks

	Click the button below to run the evaluation and submit your answers.

	⚠️ Note: This may take several minutes to complete. Please be patient.
	""")

	with gr.Row():
	run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	gr.Markdown("## Run Status / Submission Result")
	results_output = gr.Markdown("Click the button above to start evaluation...")

	with gr.Column():
	gr.Markdown("## Score")
	score_output = gr.Number(label="Final Score (%)", value=0.0, interactive=False)

	# Event handler
	run_btn.click(
	fn=run_evaluation,
	inputs=[],
	outputs=[results_output, score_output],
	show_progress=True
	)

	gr.Markdown("""
	---

	## About This Agent

	- API: xAI Grok for reasoning
	- Tools: Web search, file handling, math calculations
	- Fallbacks: Local knowledge for common questions
	- Target: 30% accuracy for certificate eligibility

	## Troubleshooting

	If you encounter issues:
	1. Check the container logs in the "Logs" tab
	2. Verify API credentials and internet connectivity
	3. Ensure all dependencies are installed

	Good luck! 🍀
	""")

	return demo

	if __name__ == "__main__":
	# Create and launch the interface
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True,
	show_api=False
	)