Spaces:
Running
Running
timchen0618
Derive new_status from new_trajectory; fix sidebar check mark; fix question for incomplete
d14bce3 | #!/usr/bin/env python3 | |
| """ | |
| Patch the selected-tools test300 HF datasets to fill missing question fields | |
| from the BrowseComp JSONL. The eval files for some queries omit the 'question' | |
| field; this script fills them using query_id -> query from BrowseComp. | |
| Python env: /scratch/hc3337/envs/raca-py312/bin/python | |
| """ | |
| from __future__ import annotations | |
| import json, sys, os | |
| from pathlib import Path | |
| os.environ.setdefault("HF_HOME", "/scratch/hc3337/.cache/huggingface") | |
| BC_JSONL = Path("/scratch/hc3337/projects/BrowseComp-Plus/data/browsecomp_plus_decrypted_test300.jsonl") | |
| REPOS = [ | |
| "timchen0618/browsecomp-plus-sel-tools-test300-gpt-oss-120b-less-chars-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-gpt-oss-120b-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-gemini-2p5-pro-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-gemini-3p1-pro-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-random-seed0-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-random-seed1-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-random-seed3-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-random-seed4-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-random-seed5-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-random-seed6-v1", | |
| "timchen0618/browsecomp-plus-sel-tools-test300-random-seed7-v1", | |
| ] | |
| def load_bc_questions(path: Path) -> dict: | |
| qmap: dict = {} | |
| with path.open("r", encoding="utf-8") as f: | |
| for line in f: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| d = json.loads(line) | |
| qid = d.get("query_id") | |
| q = d.get("query") or d.get("question") or "" | |
| if qid is not None and q: | |
| qmap[int(qid)] = q | |
| print(f"Loaded {len(qmap)} questions from {path}", file=sys.stderr) | |
| return qmap | |
| def patch_repo(repo: str, bc_questions: dict) -> None: | |
| from datasets import load_dataset, Dataset | |
| print(f"\nLoading {repo}...", file=sys.stderr) | |
| ds = load_dataset(repo, split="train") | |
| print(f" {len(ds)} rows, columns: {ds.column_names}", file=sys.stderr) | |
| rows = [] | |
| filled = 0 | |
| for row in ds: | |
| r = dict(row) | |
| qid = int(r["query_id"]) | |
| if not r.get("question"): | |
| q = bc_questions.get(qid, "") | |
| if q: | |
| r["question"] = q | |
| filled += 1 | |
| rows.append(r) | |
| print(f" Filled {filled} missing questions from BrowseComp JSONL", file=sys.stderr) | |
| if filled == 0: | |
| print(f" No changes needed — skipping push.", file=sys.stderr) | |
| return | |
| ds_new = Dataset.from_list(rows) | |
| ds_new.push_to_hub(repo, split="train", | |
| commit_message="Fill missing question fields from BrowseComp JSONL") | |
| print(f" Pushed {len(rows)} rows to {repo}.", file=sys.stderr) | |
| def main(): | |
| bc_questions = load_bc_questions(BC_JSONL) | |
| for repo in REPOS: | |
| try: | |
| patch_repo(repo, bc_questions) | |
| except Exception as e: | |
| print(f"ERROR patching {repo}: {e}", file=sys.stderr) | |
| print("\nALL DONE", file=sys.stderr) | |
| if __name__ == "__main__": | |
| main() | |