My_Final_Assignment

Runtime error

App Files Files Community

My_Final_Assignment / app.py

AlexDGenu

Refactor run_gaia_evaluation to integrate LiteLLMModel and update agent initialization.

9ec3f06 6 months ago

raw

history blame contribute delete

4.22 kB

	import os
	import requests
	from dotenv import load_dotenv
	from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel

	load_dotenv()

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	INSTRUCTIONS = """You are a general AI assistant. I will ask you a question. Report your thoughts, and then provide your final answer.

	CRITICAL FORMATTING RULES:
	- Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings
	- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise
	- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise
	- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string
	- Be extremely precise with spelling and formatting - the evaluation uses exact matching
	- For strings: no extra spaces, no punctuation unless part of the answer, lowercase
	- For numbers: just the number, no units, no commas, no currency symbols
	- Provide ONLY the answer as your final response, nothing else
	- Expand abbreviations like 'St.' to 'Saint' in city names

	You have access to a web search tool to help you find accurate information. Use it when you need to look up facts."""

	def run_gaia_evaluation():
	print("🚀 GAIA Benchmark Evaluation with Ollama")
	print("=" * 60)

	username = os.getenv("HF_USERNAME")
	if not username:
	print("❌ Please set HF_USERNAME environment variable")
	return
	print(f"👤 User: {username}")

	model = LiteLLMModel(
	model_id="ollama_chat/gemma3",
	api_base="http://localhost:11434",
	num_ctx=8192,
	temperature=0.1, # Low temperature for more deterministic answers
	)

	agent = CodeAgent(
	tools=[DuckDuckGoSearchTool()],
	model=model,
	instructions=INSTRUCTIONS,
	max_steps=10,
	)

	try:
	resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
	resp.raise_for_status()
	data = resp.json()
	questions = data if isinstance(data, list) else data.get("questions", [])
	print(f"📋 Loaded {len(questions)} questions")
	except requests.RequestException as e:
	print(f"❌ Error fetching questions: {e}")
	return

	results = []
	for i, q in enumerate(questions):
	task_id = q["task_id"]
	text = q["question"]
	print(f"\n❓ Question {i+1}: {text}")

	result = agent.run(text, reset=True)
	result_str = str(result).strip()

	# Take the last line as the answer (since agent should provide only the answer)
	out = result_str.splitlines()[-1] if result_str else "AGENT ERROR: No response."

	if out.startswith("{"):
	out = "AGENT ERROR: No final answer."

	out = out.strip().rstrip(".")
	results.append({"task_id": task_id, "submitted_answer": out})
	print(f"✅ Answer: '{out}'")
	print(f"📝 Preview: {result_str[:200]}...")

	# Submit answers automatically
	payload = {
	"username": username,
	"agent_code": "ollama-gemma3-with-tools",
	"answers": results,
	}
	try:
	post = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=60)
	post.raise_for_status()
	res = post.json()
	print("\n" + "=" * 60)
	print("🏆 GAIA BENCHMARK RESULTS")
	print("=" * 60)
	print(f"👤 User: {res.get('username', username)}")
	print(f"📊 Overall Score: {res.get('score', res.get('overall_score', 'N/A'))}%")
	print(f"✅ Correct: {res.get('correct_count', res.get('num_correct', 'N/A'))}/{len(results)}")
	print(f"💬 Message: {res.get('message', 'N/A')}")
	print("=" * 60)
	except requests.RequestException as e:
	print(f"❌ Error submitting: {e}")
	done = sum(1 for r in results if not r["submitted_answer"].startswith("AGENT ERROR"))
	print(f"Completed locally: {done}/{len(results)}")

	if __name__ == "__main__":
	run_gaia_evaluation()