File size: 1,053 Bytes
5cc2a94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""Upload browsecomp-plus benchmark to HuggingFace for the dashboard visualizer."""
import json
import sys
import os

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../../packages/key_handler"))
from key_handler import KeyHandler
KeyHandler.set_env_key()

from datasets import Dataset

DATA_PATH = os.path.join(
    os.path.dirname(__file__),
    "../../../BrowseComp-Plus/data/browsecomp_plus_decrypted.jsonl",
)
HF_REPO = "timchen0618/browsecomp-plus-benchmark"

rows = []
with open(DATA_PATH) as f:
    for line in f:
        row = json.loads(line)
        rows.append({
            "query_id": str(row["query_id"]),
            "query": row["query"],
            "answer": row["answer"],
            "evidence_docs": json.dumps(row.get("evidence_docs", [])),
            "gold_docs": json.dumps(row.get("gold_docs", [])),
        })

print(f"Loaded {len(rows)} examples")

ds = Dataset.from_list(rows)
print("Dataset:", ds)

token = os.environ.get("HF_TOKEN", "")
ds.push_to_hub(HF_REPO, token=token)
print(f"Uploaded to {HF_REPO}")