| import os | |
| import requests | |
| from langchain_core.messages import HumanMessage | |
| from agent import build_graph | |
| from huggingface_hub import hf_hub_download | |
| import pyarrow.parquet as pq | |
| from dotenv import load_dotenv | |
| load_dotenv(override=True) | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| graph = build_graph() | |
| resp = requests.get(f"{DEFAULT_API_URL}/questions") | |
| questions = resp.json() | |
| token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token) | |
| df = pq.read_table(path).to_pandas() | |
| answer_map = dict(zip(df['task_id'], df['Final answer'])) | |
| # Check Q1, Q5, Q7 | |
| for i in [0, 4, 6]: | |
| q = questions[i] | |
| task_id = q['task_id'] | |
| question = q['question'] | |
| ground_truth = answer_map.get(task_id, "NOT FOUND") | |
| result = graph.invoke({"messages": [HumanMessage(content=question)]}) | |
| answer = result['messages'][-1].content | |
| print(f"\n=== Q{i+1} ===") | |
| print(f"Q: {question[:80]}...") | |
| print(f"GT: {ground_truth}") | |
| print(f"Ans: {answer[:50]}") | |