Spaces:

timchen0618
/

dashboard

Running

dashboard / patch_sel_tools_test300_questions.py

timchen0618

Derive new_status from new_trajectory; fix sidebar check mark; fix question for incomplete

d14bce3 6 days ago

3.18 kB

	#!/usr/bin/env python3
	"""
	Patch the selected-tools test300 HF datasets to fill missing question fields
	from the BrowseComp JSONL. The eval files for some queries omit the 'question'
	field; this script fills them using query_id -> query from BrowseComp.

	Python env: /scratch/hc3337/envs/raca-py312/bin/python
	"""
	from __future__ import annotations
	import json, sys, os
	from pathlib import Path

	os.environ.setdefault("HF_HOME", "/scratch/hc3337/.cache/huggingface")

	BC_JSONL = Path("/scratch/hc3337/projects/BrowseComp-Plus/data/browsecomp_plus_decrypted_test300.jsonl")

	REPOS = [
	"timchen0618/browsecomp-plus-sel-tools-test300-gpt-oss-120b-less-chars-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-gpt-oss-120b-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-gemini-2p5-pro-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-gemini-3p1-pro-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-random-seed0-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-random-seed1-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-random-seed3-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-random-seed4-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-random-seed5-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-random-seed6-v1",
	"timchen0618/browsecomp-plus-sel-tools-test300-random-seed7-v1",
	]


	def load_bc_questions(path: Path) -> dict:
	qmap: dict = {}
	with path.open("r", encoding="utf-8") as f:
	for line in f:
	line = line.strip()
	if not line:
	continue
	d = json.loads(line)
	qid = d.get("query_id")
	q = d.get("query") or d.get("question") or ""
	if qid is not None and q:
	qmap[int(qid)] = q
	print(f"Loaded {len(qmap)} questions from {path}", file=sys.stderr)
	return qmap


	def patch_repo(repo: str, bc_questions: dict) -> None:
	from datasets import load_dataset, Dataset

	print(f"\nLoading {repo}...", file=sys.stderr)
	ds = load_dataset(repo, split="train")
	print(f" {len(ds)} rows, columns: {ds.column_names}", file=sys.stderr)

	rows = []
	filled = 0
	for row in ds:
	r = dict(row)
	qid = int(r["query_id"])
	if not r.get("question"):
	q = bc_questions.get(qid, "")
	if q:
	r["question"] = q
	filled += 1
	rows.append(r)

	print(f" Filled {filled} missing questions from BrowseComp JSONL", file=sys.stderr)
	if filled == 0:
	print(f" No changes needed — skipping push.", file=sys.stderr)
	return

	ds_new = Dataset.from_list(rows)
	ds_new.push_to_hub(repo, split="train",
	commit_message="Fill missing question fields from BrowseComp JSONL")
	print(f" Pushed {len(rows)} rows to {repo}.", file=sys.stderr)


	def main():
	bc_questions = load_bc_questions(BC_JSONL)
	for repo in REPOS:
	try:
	patch_repo(repo, bc_questions)
	except Exception as e:
	print(f"ERROR patching {repo}: {e}", file=sys.stderr)
	print("\nALL DONE", file=sys.stderr)


	if __name__ == "__main__":
	main()