Final_Assignment_Template

Running

Refactor and add new debugging scripts; update question fetching logic

3f4fc54 about 2 months ago

1.25 kB

	import os
	import requests
	from langchain_core.messages import HumanMessage
	from agent import build_graph
	from huggingface_hub import hf_hub_download
	import pyarrow.parquet as pq
	from dotenv import load_dotenv

	load_dotenv(override=True)

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# Initialize agent
	graph = build_graph()

	# Fetch 1 question
	resp = requests.get(f"{DEFAULT_API_URL}/questions")
	questions = resp.json()[:1]

	# Load ground truth
	token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
	path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
	df = pq.read_table(path).to_pandas()
	answer_map = dict(zip(df['task_id'], df['Final answer']))

	# Test
	q = questions[0]
	task_id = q['task_id']
	question = q['question']
	ground_truth = answer_map.get(task_id, "NOT FOUND")

	print(f"Question: {question[:100]}...")
	print(f"Ground Truth: {ground_truth}")
	print("-" * 40)

	result = graph.invoke({"messages": [HumanMessage(content=question)]})
	answer = result['messages'][-1].content
	print(f"Agent Answer: {answer}")
	print("-" * 40)

	is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
	print(f"Correct: {is_correct}")