final_assignment_v4

Sleeping

Chris Ellerson

initial commit of agent with score of 60

68ed57f 10 months ago

9.93 kB

	import os
	import tempfile
	import gradio as gr
	import pandas as pd
	import traceback
	from core_agent import GAIAAgent
	from api_integration import GAIAApiClient

	# Constants
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	def save_task_file(file_content, task_id):
	"""
	Save a task file to a temporary location
	"""
	if not file_content:
	return None

	# Create a temporary file
	temp_dir = tempfile.gettempdir()
	file_path = os.path.join(temp_dir, f"gaia_task_{task_id}.txt")

	# Write content to the file
	with open(file_path, 'wb') as f:
	f.write(file_content)

	print(f"File saved to {file_path}")
	return file_path

	def get_agent_configuration():
	"""
	Get the agent configuration based on environment variables
	"""
	# Default configuration
	config = {
	"model_type": "OpenAIServerModel", # Default to OpenAIServerModel
	"model_id": "gpt-4o", # Default model for OpenAI
	"temperature": 0.2,
	"executor_type": "local",
	"verbose": False,
	"provider": "hf-inference", # For InferenceClientModel
	"timeout": 120 # For InferenceClientModel
	}

	# Check for xAI API key and base URL
	xai_api_key = os.getenv("XAI_API_KEY")
	xai_api_base = os.getenv("XAI_API_BASE")

	# If we have xAI credentials, use them
	if xai_api_key:
	config["api_key"] = xai_api_key
	if xai_api_base:
	config["api_base"] = xai_api_base
	# Use a model that works well with xAI
	config["model_id"] = "mixtral-8x7b-32768"

	# Override with environment variables if present
	if os.getenv("AGENT_MODEL_TYPE"):
	config["model_type"] = os.getenv("AGENT_MODEL_TYPE")

	if os.getenv("AGENT_MODEL_ID"):
	config["model_id"] = os.getenv("AGENT_MODEL_ID")

	if os.getenv("AGENT_TEMPERATURE"):
	config["temperature"] = float(os.getenv("AGENT_TEMPERATURE"))

	if os.getenv("AGENT_EXECUTOR_TYPE"):
	config["executor_type"] = os.getenv("AGENT_EXECUTOR_TYPE")

	if os.getenv("AGENT_VERBOSE") is not None:
	config["verbose"] = os.getenv("AGENT_VERBOSE").lower() == "true"

	if os.getenv("AGENT_API_BASE"):
	config["api_base"] = os.getenv("AGENT_API_BASE")

	# InferenceClientModel specific settings
	if os.getenv("AGENT_PROVIDER"):
	config["provider"] = os.getenv("AGENT_PROVIDER")

	if os.getenv("AGENT_TIMEOUT"):
	config["timeout"] = int(os.getenv("AGENT_TIMEOUT"))

	return config

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	"""
	Fetches all questions, runs the GAIAAgent on them, submits all answers,
	and displays the results.
	"""
	# Check for user login
	if not profile:
	return "Please Login to Hugging Face with the button.", None

	username = profile.username
	print(f"User logged in: {username}")

	# Get SPACE_ID for code link
	space_id = os.getenv("SPACE_ID")
	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

	# Initialize API client
	api_client = GAIAApiClient(DEFAULT_API_URL)

	# Initialize Agent with configuration
	try:
	agent_config = get_agent_configuration()
	print(f"Using agent configuration: {agent_config}")

	agent = GAIAAgent(**agent_config)
	print("Agent initialized successfully")
	except Exception as e:
	error_details = traceback.format_exc()
	print(f"Error initializing agent: {e}\n{error_details}")
	return f"Error initializing agent: {e}", None

	# Fetch questions
	try:
	questions_data = api_client.get_questions()
	if not questions_data:
	return "Fetched questions list is empty or invalid format.", None
	print(f"Fetched {len(questions_data)} questions.")
	except Exception as e:
	error_details = traceback.format_exc()
	print(f"Error fetching questions: {e}\n{error_details}")
	return f"Error fetching questions: {e}", None

	# Run agent on questions
	results_log = []
	answers_payload = []
	print(f"Running agent on {len(questions_data)} questions...")

	# Progress tracking
	total_questions = len(questions_data)
	completed = 0
	failed = 0

	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	if not task_id or question_text is None:
	print(f"Skipping item with missing task_id or question: {item}")
	continue

	try:
	# Update progress
	completed += 1
	print(f"Processing question {completed}/{total_questions}: Task ID {task_id}")

	# Check if the question has an associated file
	file_path = None
	try:
	file_content = api_client.get_file(task_id)
	print(f"Downloaded file for task {task_id}")
	file_path = save_task_file(file_content, task_id)
	except Exception as file_e:
	print(f"No file found for task {task_id} or error: {file_e}")

	# Run the agent to get the answer
	submitted_answer = agent.answer_question(question_text, file_path)

	# Add to results
	answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Submitted Answer": submitted_answer
	})
	except Exception as e:
	# Update error count
	failed += 1
	error_details = traceback.format_exc()
	print(f"Error running agent on task {task_id}: {e}\n{error_details}")

	# Add error to results
	error_msg = f"AGENT ERROR: {e}"
	answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Submitted Answer": error_msg
	})

	# Print summary
	print(f"\nProcessing complete: {completed} questions processed, {failed} failures")

	if not answers_payload:
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	# Submit answers
	submission_data = {
	"username": username.strip(),
	"agent_code": agent_code,
	"answers": answers_payload
	}

	print(f"Submitting {len(answers_payload)} answers for username '{username}'...")

	try:
	result_data = api_client.submit_answers(
	username.strip(),
	agent_code,
	answers_payload
	)

	# Calculate success rate
	correct_count = result_data.get('correct_count', 0)
	total_attempted = result_data.get('total_attempted', len(answers_payload))
	success_rate = (correct_count / total_attempted) * 100 if total_attempted > 0 else 0

	final_status = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username')}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({correct_count}/{total_attempted} correct, {success_rate:.1f}% success rate)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)

	print("Submission successful.")
	return final_status, pd.DataFrame(results_log)
	except Exception as e:
	error_details = traceback.format_exc()
	status_message = f"Submission Failed: {e}\n{error_details}"
	print(status_message)
	return status_message, pd.DataFrame(results_log)

	# Build Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# GAIA Agent Evaluation Runner")
	gr.Markdown(
	"""
	Instructions:

	1. Log in to your Hugging Face account using the button below.
	2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.

	Configuration:

	You can configure the agent by setting these environment variables:
	- `AGENT_MODEL_TYPE`: Model type (HfApiModel, InferenceClientModel, LiteLLMModel, OpenAIServerModel)
	- `AGENT_MODEL_ID`: Model ID
	- `AGENT_TEMPERATURE`: Temperature for generation (0.0-1.0)
	- `AGENT_EXECUTOR_TYPE`: Type of executor ('local' or 'e2b')
	- `AGENT_VERBOSE`: Enable verbose logging (true/false)
	- `AGENT_API_BASE`: Base URL for API calls (for OpenAIServerModel)

	xAI Support:
	- `XAI_API_KEY`: Your xAI API key
	- `XAI_API_BASE`: Base URL for xAI API (default: https://api.groq.com/openai/v1)
	- When using xAI, set AGENT_MODEL_TYPE=OpenAIServerModel and AGENT_MODEL_ID=mixtral-8x7b-32768

	InferenceClientModel specific settings:
	- `AGENT_PROVIDER`: Provider for InferenceClientModel (e.g., "hf-inference")
	- `AGENT_TIMEOUT`: Timeout in seconds for API calls
	"""
	)

	gr.LoginButton()

	run_button = gr.Button("Run Evaluation & Submit All Answers")

	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_button.click(
	fn=run_and_submit_all,
	outputs=[status_output, results_table]
	)

	if __name__ == "__main__":
	print("\n" + "-"30 + " App Starting " + "-"30)

	# Check for environment variables
	config = get_agent_configuration()
	print(f"Agent configuration: {config}")

	# Run the Gradio app
	demo.launch(debug=True, share=False)