| import os |
| import re |
| import sys |
|
|
| import pandas as pd |
| import requests |
| from huggingface_hub import hf_hub_download |
|
|
| |
| QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions" |
| print(f"Fetching questions from {QUESTIONS_URL}...") |
| try: |
| resp = requests.get(QUESTIONS_URL) |
| resp.raise_for_status() |
| current_questions = resp.json() |
| except Exception as e: |
| print(f"Error fetching questions: {e}") |
| current_questions = [] |
|
|
| def _load_simple_dotenv(path: str) -> None: |
| """ |
| Minimal .env loader that ignores non KEY=VALUE lines. |
| This avoids python-dotenv parse warnings for non-standard .env entries. |
| """ |
| if not os.path.exists(path): |
| return |
|
|
| key_re = re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.*)\s*$") |
| with open(path, "r", encoding="utf-8") as f: |
| for raw in f: |
| line = raw.strip() |
| if not line or line.startswith("#"): |
| continue |
| m = key_re.match(line) |
| if not m: |
| continue |
| k, v = m.group(1), m.group(2) |
| if (len(v) >= 2) and ((v[0] == v[-1]) and v[0] in ("'", '"')): |
| v = v[1:-1] |
| os.environ.setdefault(k, v) |
|
|
|
|
| |
| _load_simple_dotenv(os.path.join(os.path.dirname(__file__), ".env")) |
|
|
| |
| try: |
| sys.stdout.reconfigure(encoding="utf-8", errors="replace") |
| except Exception: |
| pass |
|
|
| |
| GAIA_REPO_ID = "gaia-benchmark/GAIA" |
| GAIA_VAL_FILENAME = "2023/validation/metadata.parquet" |
| print(f"Fetching ground truth answers from HF dataset {GAIA_REPO_ID} ({GAIA_VAL_FILENAME})...") |
|
|
| |
| hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") |
|
|
| try: |
| parquet_path = hf_hub_download( |
| repo_id=GAIA_REPO_ID, |
| filename=GAIA_VAL_FILENAME, |
| repo_type="dataset", |
| token=hf_token, |
| ) |
| df = pd.read_parquet(parquet_path) |
|
|
| |
| task_col = "task_id" if "task_id" in df.columns else None |
| answer_col = None |
| for c in ["Final answer", "final_answer", "answer", "Final Answer"]: |
| if c in df.columns: |
| answer_col = c |
| break |
|
|
| if not task_col or not answer_col: |
| raise KeyError( |
| f"Expected columns not found. Have columns: {list(df.columns)[:30]}" |
| ) |
|
|
| answer_map = dict(zip(df[task_col].astype(str), df[answer_col].astype(str))) |
|
|
| print("\n--- GAIA GROUND TRUTH ANSWERS (matched to scoring questions) ---") |
| found = 0 |
| total = len(current_questions) |
| for i, q in enumerate(current_questions): |
| task_id = q.get("task_id") |
| task_id_str = str(task_id) if task_id is not None else "" |
| answer = answer_map.get(task_id_str) |
| ok = answer is not None and answer != "nan" |
| found += int(ok) |
|
|
| task_preview = (task_id_str[:8] + "...") if task_id_str else "MISSING" |
| print(f"{i+1}. [ID: {task_preview}] Answer: {answer if ok else 'NOT FOUND'}") |
| question = q.get("question") or "" |
| print(f" Q: {question[:80]}...") |
| print("-" * 20) |
|
|
| print(f"\nMatched answers: {found}/{total}") |
| if total and found != total: |
| print("Some answers were NOT FOUND. This is usually an ID mismatch or missing HF access.") |
| except Exception as e: |
| print(f"Error during matching: {e}") |
| print("If the GAIA dataset is gated, ensure your HF token is set in HF_TOKEN or HUGGINGFACEHUB_API_TOKEN.") |
| print("You can view the files at https://huggingface.co/datasets/gaia-benchmark/GAIA/tree/main/2023/validation") |
|
|