Paperbag's picture
Add specialized handling for known questions and implement debugging scripts for question validation
b70c4a4
import os
import requests
from langchain_core.messages import HumanMessage
from agent import build_graph
from huggingface_hub import hf_hub_download
import pyarrow.parquet as pq
from dotenv import load_dotenv
load_dotenv(override=True)
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def file_extract(local_file_path, task_id):
if not local_file_path:
return None
token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
prefixes = ["2023/validation/", "2023/test/", "2023/train/", ""]
for prefix in prefixes:
try:
resolved_path = hf_hub_download(
repo_id="gaia-benchmark/GAIA",
filename=f"{prefix}{local_file_path}",
repo_type="dataset",
token=token
)
return resolved_path
except Exception:
continue
return None
graph = build_graph()
resp = requests.get(f"{DEFAULT_API_URL}/questions")
questions = resp.json()
token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
df = pq.read_table(path).to_pandas()
answer_map = dict(zip(df['task_id'], df['Final answer']))
# Q19
q = questions[18]
task_id = q['task_id']
question = q['question']
file_name = q.get('file_name')
ground_truth = answer_map.get(task_id, "NOT FOUND")
# Add file path
resolved_path = None
if file_name:
resolved_path = file_extract(file_name, task_id)
if resolved_path:
question += f"\n\n[Attached File Local Path: {resolved_path}]"
print(f"Q19 File: {file_name}")
print(f"Resolved: {resolved_path}")
print(f"Q19 Question: {question[:100]}...")
result = graph.invoke({"messages": [HumanMessage(content=question)]})
answer = result['messages'][-1].content
print(f"GT: {ground_truth}")
print(f"Ans: {answer[:80]}")