File size: 1,251 Bytes
3f4fc54 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | import os
import requests
from langchain_core.messages import HumanMessage
from agent import build_graph
from huggingface_hub import hf_hub_download
import pyarrow.parquet as pq
from dotenv import load_dotenv
load_dotenv(override=True)
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# Initialize agent
graph = build_graph()
# Fetch 1 question
resp = requests.get(f"{DEFAULT_API_URL}/questions")
questions = resp.json()[:1]
# Load ground truth
token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
df = pq.read_table(path).to_pandas()
answer_map = dict(zip(df['task_id'], df['Final answer']))
# Test
q = questions[0]
task_id = q['task_id']
question = q['question']
ground_truth = answer_map.get(task_id, "NOT FOUND")
print(f"Question: {question[:100]}...")
print(f"Ground Truth: {ground_truth}")
print("-" * 40)
result = graph.invoke({"messages": [HumanMessage(content=question)]})
answer = result['messages'][-1].content
print(f"Agent Answer: {answer}")
print("-" * 40)
is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
print(f"Correct: {is_correct}") |