Spaces:
Running
Running
File size: 1,053 Bytes
5cc2a94 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | """Upload browsecomp-plus benchmark to HuggingFace for the dashboard visualizer."""
import json
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../../packages/key_handler"))
from key_handler import KeyHandler
KeyHandler.set_env_key()
from datasets import Dataset
DATA_PATH = os.path.join(
os.path.dirname(__file__),
"../../../BrowseComp-Plus/data/browsecomp_plus_decrypted.jsonl",
)
HF_REPO = "timchen0618/browsecomp-plus-benchmark"
rows = []
with open(DATA_PATH) as f:
for line in f:
row = json.loads(line)
rows.append({
"query_id": str(row["query_id"]),
"query": row["query"],
"answer": row["answer"],
"evidence_docs": json.dumps(row.get("evidence_docs", [])),
"gold_docs": json.dumps(row.get("gold_docs", [])),
})
print(f"Loaded {len(rows)} examples")
ds = Dataset.from_list(rows)
print("Dataset:", ds)
token = os.environ.get("HF_TOKEN", "")
ds.push_to_hub(HF_REPO, token=token)
print(f"Uploaded to {HF_REPO}")
|