dashboard / patch_sel_tools_test300_questions.py
timchen0618
Derive new_status from new_trajectory; fix sidebar check mark; fix question for incomplete
d14bce3
#!/usr/bin/env python3
"""
Patch the selected-tools test300 HF datasets to fill missing question fields
from the BrowseComp JSONL. The eval files for some queries omit the 'question'
field; this script fills them using query_id -> query from BrowseComp.
Python env: /scratch/hc3337/envs/raca-py312/bin/python
"""
from __future__ import annotations
import json, sys, os
from pathlib import Path
os.environ.setdefault("HF_HOME", "/scratch/hc3337/.cache/huggingface")
BC_JSONL = Path("/scratch/hc3337/projects/BrowseComp-Plus/data/browsecomp_plus_decrypted_test300.jsonl")
REPOS = [
"timchen0618/browsecomp-plus-sel-tools-test300-gpt-oss-120b-less-chars-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-gpt-oss-120b-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-gemini-2p5-pro-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-gemini-3p1-pro-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed0-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed1-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed3-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed4-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed5-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed6-v1",
"timchen0618/browsecomp-plus-sel-tools-test300-random-seed7-v1",
]
def load_bc_questions(path: Path) -> dict:
qmap: dict = {}
with path.open("r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
d = json.loads(line)
qid = d.get("query_id")
q = d.get("query") or d.get("question") or ""
if qid is not None and q:
qmap[int(qid)] = q
print(f"Loaded {len(qmap)} questions from {path}", file=sys.stderr)
return qmap
def patch_repo(repo: str, bc_questions: dict) -> None:
from datasets import load_dataset, Dataset
print(f"\nLoading {repo}...", file=sys.stderr)
ds = load_dataset(repo, split="train")
print(f" {len(ds)} rows, columns: {ds.column_names}", file=sys.stderr)
rows = []
filled = 0
for row in ds:
r = dict(row)
qid = int(r["query_id"])
if not r.get("question"):
q = bc_questions.get(qid, "")
if q:
r["question"] = q
filled += 1
rows.append(r)
print(f" Filled {filled} missing questions from BrowseComp JSONL", file=sys.stderr)
if filled == 0:
print(f" No changes needed — skipping push.", file=sys.stderr)
return
ds_new = Dataset.from_list(rows)
ds_new.push_to_hub(repo, split="train",
commit_message="Fill missing question fields from BrowseComp JSONL")
print(f" Pushed {len(rows)} rows to {repo}.", file=sys.stderr)
def main():
bc_questions = load_bc_questions(BC_JSONL)
for repo in REPOS:
try:
patch_repo(repo, bc_questions)
except Exception as e:
print(f"ERROR patching {repo}: {e}", file=sys.stderr)
print("\nALL DONE", file=sys.stderr)
if __name__ == "__main__":
main()