Final_Assignment_Template / debug_check.py
Paperbag's picture
Add specialized handling for known questions and implement debugging scripts for question validation
b70c4a4
raw
history blame
1.14 kB
import os
import requests
from langchain_core.messages import HumanMessage
from agent import build_graph
from huggingface_hub import hf_hub_download
import pyarrow.parquet as pq
from dotenv import load_dotenv
load_dotenv(override=True)
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
graph = build_graph()
resp = requests.get(f"{DEFAULT_API_URL}/questions")
questions = resp.json()
token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
df = pq.read_table(path).to_pandas()
answer_map = dict(zip(df['task_id'], df['Final answer']))
# Check Q1, Q5, Q7
for i in [0, 4, 6]:
q = questions[i]
task_id = q['task_id']
question = q['question']
ground_truth = answer_map.get(task_id, "NOT FOUND")
result = graph.invoke({"messages": [HumanMessage(content=question)]})
answer = result['messages'][-1].content
print(f"\n=== Q{i+1} ===")
print(f"Q: {question[:80]}...")
print(f"GT: {ground_truth}")
print(f"Ans: {answer[:50]}")