dashboard / fix_readme_features.py
timchen0618
Remove debug traceback from reload error; add fix_readme_features script
8026e0e
#!/usr/bin/env python3
"""
Fix HF dataset README metadata for the 11 test300 selected-tools datasets.
The push_to_hub() updated the parquet (13 cols) but not the README (still 10 cols).
This script updates the YAML front matter in each README to include the 3 new columns.
"""
import os, sys
os.environ["HF_HOME"] = "/scratch/hc3337/.cache/huggingface"
from huggingface_hub import HfApi, DatasetCard
REPOS = [
"timchen0618/browsecomp-plus-sel-tools-test300-gpt-oss-120b-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-gpt-oss-120b-less-chars-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-gemini-2p5-pro-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-gemini-3p1-pro-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed0-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed1-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed3-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed4-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed5-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed6-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed7-v1",
]
NEW_COLUMNS = [
{"name": "question", "dtype": "string"},
{"name": "correct_answer", "dtype": "string"},
{"name": "correct", "dtype": "bool"},
]
api = HfApi()
for repo in REPOS:
print(f"Fixing {repo}...")
try:
card = DatasetCard.load(repo)
features = card.data.get("dataset_info", {}).get("features", [])
if not features:
print(f" WARNING: no features found in card data for {repo}")
print(f" card.data keys: {list(card.data.keys())}")
continue
existing_names = {f["name"] for f in features}
for col in NEW_COLUMNS:
if col["name"] not in existing_names:
features.append(col)
print(f" Added column: {col['name']}")
else:
print(f" Column already present: {col['name']}")
card.data["dataset_info"]["features"] = features
card.push_to_hub(repo)
print(f" Updated README for {repo}")
except Exception as e:
print(f" ERROR: {e}")
import traceback; traceback.print_exc()
print("Done!")