Final_Assignment_Template / find_gaia_answers.py
Paperbag's picture
Refactor and add new debugging scripts; update question fetching logic
3f4fc54
import os
import re
import sys
import pandas as pd
import requests
from huggingface_hub import hf_hub_download
# 1. Fetch current questions from the scoring space
QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
print(f"Fetching questions from {QUESTIONS_URL}...")
try:
resp = requests.get(QUESTIONS_URL)
resp.raise_for_status()
current_questions = resp.json()
except Exception as e:
print(f"Error fetching questions: {e}")
current_questions = []
def _load_simple_dotenv(path: str) -> None:
"""
Minimal .env loader that ignores non KEY=VALUE lines.
This avoids python-dotenv parse warnings for non-standard .env entries.
"""
if not os.path.exists(path):
return
key_re = re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.*)\s*$")
with open(path, "r", encoding="utf-8") as f:
for raw in f:
line = raw.strip()
if not line or line.startswith("#"):
continue
m = key_re.match(line)
if not m:
continue
k, v = m.group(1), m.group(2)
if (len(v) >= 2) and ((v[0] == v[-1]) and v[0] in ("'", '"')):
v = v[1:-1]
os.environ.setdefault(k, v)
# Load .env if present, but tolerate invalid lines
_load_simple_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
# Avoid Windows console encoding crashes on Unicode characters
try:
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
# 2. Fetch GAIA 2023 validation metadata from HF (Parquet)
GAIA_REPO_ID = "gaia-benchmark/GAIA"
GAIA_VAL_FILENAME = "2023/validation/metadata.parquet"
print(f"Fetching ground truth answers from HF dataset {GAIA_REPO_ID} ({GAIA_VAL_FILENAME})...")
# Token can be required for gated datasets
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
try:
parquet_path = hf_hub_download(
repo_id=GAIA_REPO_ID,
filename=GAIA_VAL_FILENAME,
repo_type="dataset",
token=hf_token,
)
df = pd.read_parquet(parquet_path)
# Build a map task_id -> answer with some tolerance to column naming
task_col = "task_id" if "task_id" in df.columns else None
answer_col = None
for c in ["Final answer", "final_answer", "answer", "Final Answer"]:
if c in df.columns:
answer_col = c
break
if not task_col or not answer_col:
raise KeyError(
f"Expected columns not found. Have columns: {list(df.columns)[:30]}"
)
answer_map = dict(zip(df[task_col].astype(str), df[answer_col].astype(str)))
print("\n--- GAIA GROUND TRUTH ANSWERS (matched to scoring questions) ---")
found = 0
total = len(current_questions)
for i, q in enumerate(current_questions):
task_id = q.get("task_id")
task_id_str = str(task_id) if task_id is not None else ""
answer = answer_map.get(task_id_str)
ok = answer is not None and answer != "nan"
found += int(ok)
task_preview = (task_id_str[:8] + "...") if task_id_str else "MISSING"
print(f"{i+1}. [ID: {task_preview}] Answer: {answer if ok else 'NOT FOUND'}")
question = q.get("question") or ""
print(f" Q: {question[:80]}...")
print("-" * 20)
print(f"\nMatched answers: {found}/{total}")
if total and found != total:
print("Some answers were NOT FOUND. This is usually an ID mismatch or missing HF access.")
except Exception as e:
print(f"Error during matching: {e}")
print("If the GAIA dataset is gated, ensure your HF token is set in HF_TOKEN or HUGGINGFACEHUB_API_TOKEN.")
print("You can view the files at https://huggingface.co/datasets/gaia-benchmark/GAIA/tree/main/2023/validation")