Final_Assignment_Template

Restarting

App Files Files Community

Final_Assignment_Template / run_local.py

Paperbag

claude fix

21be703 10 days ago

raw

history blame contribute delete

4.02 kB

	import os
	import requests
	import pandas as pd
	import pyarrow.parquet as pq
	import json
	import time
	from langchain_core.messages import HumanMessage
	from agent import build_graph
	from huggingface_hub import hf_hub_download
	from dotenv import load_dotenv

	load_dotenv(override=True)

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	class BasicAgent:
	def __init__(self):
	print("BasicAgent initialized.")
	self.graph = build_graph()

	def __call__(self, question: str) -> str:
	messages = [HumanMessage(content=question)]
	result = self.graph.invoke({"messages": messages})
	answer = result['messages'][-1].content
	return answer

	def file_extract(local_file_path, task_id):
	if not local_file_path:
	return None

	token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
	prefixes = ["2023/validation/", "2023/test/", "2023/train/", ""]

	for prefix in prefixes:
	try:
	resolved_path = hf_hub_download(
	repo_id="gaia-benchmark/GAIA",
	filename=f"{prefix}{local_file_path}",
	repo_type="dataset",
	token=token
	)
	return resolved_path
	except Exception:
	continue
	return None

	def main():
	# 1. Fetch questions
	print("Fetching questions...")
	questions_url = f"{DEFAULT_API_URL}/questions"
	response = requests.get(questions_url, timeout=15)
	questions_data = response.json()
	print(f"Fetched {len(questions_data)} questions")

	# 2. Load ground truth
	print("Loading ground truth...")
	token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
	path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
	df = pq.read_table(path).to_pandas()
	answer_map = dict(zip(df['task_id'], df['Final answer']))

	# 3. Initialize agent
	agent = BasicAgent()

	# 4. Run on all questions (can slice for testing)
	results = []

	# Run ALL questions
	for i, item in enumerate(questions_data):
	task_id = item.get("task_id")
	question_text = item.get("question")
	file_name = item.get("file_name")

	if not task_id or question_text is None:
	continue

	if file_name:
	resolved_path = file_extract(file_name, task_id)
	if resolved_path:
	question_text += f"\n\n[Attached File Local Path: {resolved_path}]"

	print(f"\n[{i+1}/{len(questions_data)}] Task: {task_id[:20]}...")

	try:
	answer = agent(question_text)
	except Exception as e:
	answer = f"ERROR: {e}"

	ground_truth = answer_map.get(task_id, "NOT FOUND")
	is_correct = str(answer).strip().lower() == str(ground_truth).strip().lower()

	results.append({
	"task_id": task_id,
	"question": item.get("question"),
	"submitted_answer": answer,
	"ground_truth": ground_truth,
	"correct": is_correct
	})

	status = "✅" if is_correct else "❌"
	print(f" {status} Submitted: {str(answer)[:40]}")
	print(f" Ground: {str(ground_truth)[:40]}")

	time.sleep(1.5)

	# 5. Calculate score
	correct_count = sum(1 for r in results if r["correct"])
	total = len(results)
	score_pct = correct_count / total * 100 if total > 0 else 0

	print("\n" + "="*60)
	print(f"FINAL SCORE: {correct_count}/{total} = {score_pct:.0f}%")
	print("="*60)

	# 6. Save results
	output = {"score": score_pct, "correct": correct_count, "total": total, "results": results}

	with open("gaia_results.json", "w") as f:
	json.dump(output, f, indent=2)
	pd.DataFrame(results).to_csv("gaia_results.csv", index=False)
	print("Results saved!")

	if __name__ == "__main__":
	main()