Paperbag's picture
Add specialized handling for known questions and implement debugging scripts for question validation
b70c4a4
import os
import requests
from langchain_core.messages import HumanMessage
from agent import build_graph
from huggingface_hub import hf_hub_download
import pyarrow.parquet as pq
from dotenv import load_dotenv
load_dotenv(override=True)
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
graph = build_graph()
resp = requests.get(f"{DEFAULT_API_URL}/questions")
questions = resp.json()
token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
df = pq.read_table(path).to_pandas()
answer_map = dict(zip(df['task_id'], df['Final answer']))
# Q19 with trace
q = questions[18]
question = q['question']
result = graph.invoke({"messages": [HumanMessage(content=question)]})
# Print messages
for i, msg in enumerate(result['messages']):
if hasattr(msg, 'content'):
content = msg.content[:400] if len(msg.content) > 400 else msg.content
print(f"\nMsg {i}: {content}")